diff --git a/.gitignore b/.gitignore index 24e5896..e54aea4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,4 @@ .env.gitbucket .env.ai **/.env* -app/CaudeCodeSourceCode/* \ No newline at end of file +CaudeCodeSourceCode/ \ No newline at end of file diff --git a/ai-hub/app/core/orchestration/agent_loop.py b/ai-hub/app/core/orchestration/agent_loop.py index ce5a73d..fc1ec88 100644 --- a/ai-hub/app/core/orchestration/agent_loop.py +++ b/ai-hub/app/core/orchestration/agent_loop.py @@ -222,6 +222,7 @@ # Check Rework Limits if current_attempt >= max_rework_attempts: instance.evaluation_status = "failed_limit" + instance.last_error = f"Co-Worker Gate: Quality fell below {rework_threshold}% after {max_rework_attempts} attempts. Final Score: {score}/100. Audit .cortex/feedback.md for details." db.commit() await evaluator.log_round(current_attempt + 1, score, "Failed quality gate after max attempts.") break # No more reworks @@ -231,14 +232,20 @@ instance.current_rework_attempt = current_attempt + 1 db.commit() - # We pass the history log and transcript (last_reasoning) to the delta analyst - # We need to fetch the history log first + # Fetch history for potential compaction or analysis cmd_res = await evaluator.assistant.dispatch_single(instance.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) hist_log = [] try: hist_log = json.loads(cmd_res.get("stdout", "[]")) except: pass - - directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning) + + # --- CONTEXT COMPACTION GATE (M4 Requirement) --- + # If we are entering Attempt 3 (current_attempt is 2), we compact the history + if current_attempt == 2: + directive_feedback = await evaluator.generate_compaction_summary(prompt, hist_log) + if registry and instance.mesh_node_id: + registry.emit(instance.mesh_node_id, "status_update", {"context_state": "compacted"}) + else: + directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning) # Log this round await evaluator.log_round(current_attempt + 1, score, justification) diff --git a/ai-hub/app/core/orchestration/harness_evaluator.py b/ai-hub/app/core/orchestration/harness_evaluator.py index 97ad806..16ed6a3 100644 --- a/ai-hub/app/core/orchestration/harness_evaluator.py +++ b/ai-hub/app/core/orchestration/harness_evaluator.py @@ -224,27 +224,66 @@ "justification": final_answer } + async def generate_compaction_summary(self, initial_prompt: str, history_log: List[Dict[str, Any]]) -> str: + """Micro-compaction strategy: Distills multiple failed rework attempts into a high-density directive.""" + if not self.llm_provider or not history_log: + return "COMPACTED DIRECTIVE: Resolve all remaining implementation gaps identified in previous rounds." + + failure_path = "" + for i, entry in enumerate(history_log): + failure_path += f"Attempt {i+1} (Score: {entry.get('score', 0)}): {entry.get('reason', 'Unknown failure')}\n" + + system_prompt = """You are a Quality Control Compactor. +You have analyzed 2+ failed attempts to solve a task. +Your goal is to distill all previous critiques, failures, and delta-analysis reports into a SINGLE, high-density 'Compacted Directive'. +Remove all repetitive context, filler, and conversational markers. Focus only on the 'Critical Delta' that remains unfixed. + +Format exactly as: +# COMPACTED DIRECTIVE +[Dense listing of required fixes]""" + + user_prompt = f"Original Mission: {initial_prompt}\n\nFailure History:\n{failure_path}\n\nGenerate the Compacted Directive now." + + try: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + prediction = await self.llm_provider.acompletion(messages=messages, stream=False) + return prediction.choices[0].message.content + except Exception as e: + logger.error(f"[HarnessEvaluator] Compaction fault: {e}") + return "# COMPACTED DIRECTIVE\n- Critical implementation failure. Perform deep audit and stabilize core logic." + async def log_round(self, round_num: int, score: int, reason: str): - """Append-only record-keeping in history.log.""" + """Append-only record-keeping in history.log with hardening for mesh timeouts.""" if not self.assistant: return try: # Atomic Read-Modify-Write for the JSON log on the node + # Set a shorter timeout specifically for the log cat to prevent loop hangs cmd_res = await self.assistant.dispatch_single( self.mesh_node_id, "cat .cortex/history.log", - session_id=self.sync_workspace_id + session_id=self.sync_workspace_id, + timeout=5 # Aggressive timeout for logging ) + history = [] if cmd_res.get("status") == "SUCCESS": try: history = json.loads(cmd_res.get("stdout", "[]")) - except: + except Exception as je: + logger.warning(f"[HarnessEvaluator] history.log corruption detected, resetting: {je}") history = [] + elif cmd_res.get("status") in ("TIMEOUT", "OFFLINE"): + logger.error(f"[HarnessEvaluator] Mesh connection failed during log_round (Status: {cmd_res.get('status')}). Logging to Hub-fallback only.") + # We don't raise here to allow the main loop to continue even if the log write fails + return history.append({ "round": round_num, "score": score, - "reason": reason[:200] + "..." if len(reason) > 200 else reason, + "reason": reason[:400] + "..." if len(reason) > 400 else reason, "timestamp": time.time() }) @@ -255,4 +294,4 @@ session_id=self.sync_workspace_id ) except Exception as e: - logger.error(f"[HarnessEvaluator] Failed to log round: {e}") + logger.error(f"[HarnessEvaluator] Critical fault during log_round: {e}") diff --git a/ai-hub/hub.log b/ai-hub/hub.log index 5f27b55..63b0e07 100644 --- a/ai-hub/hub.log +++ b/ai-hub/hub.log @@ -1,4 +1,5 @@ nohup: ignoring input +WARNING:app.app:Failed to initialize TTS/STT: 'NoneType' object has no attribute 'split' INFO:app.core.tools.registry:Registered dynamic tool plugin: 'browser_automation_agent' INFO:app.core.tools.registry:Registered dynamic tool plugin: 'mesh_file_explorer' INFO:app.core.tools.registry:Registered dynamic tool plugin: 'mesh_inspect_drift' @@ -6,7 +7,7 @@ INFO:app.core.tools.registry:Registered dynamic tool plugin: 'mesh_terminal_control' INFO:app.core.tools.registry:Registered dynamic tool plugin: 'mesh_wait_tasks' INFO:app.core.tools.registry:Registered dynamic tool plugin: 'read_skill_artifact' -INFO: Started server process [73481] +INFO: Started server process [8915] INFO: Waiting for application startup. INFO:app.db.migrate:Starting database migrations... INFO:app.db.migrate:Column 'audio_path' already exists in 'messages'. @@ -30,16 +31,25 @@ INFO:app.app:[M6] Agent Orchestrator gRPC server started on port 50051. INFO:app.core.orchestration.scheduler:[Scheduler] Agent background services (Zombie Sweeper & CRON) started. INFO:app.core.skills.bootstrap:Checking for system skills bootstrapping... -INFO:app.core.skills.bootstrap:System skills bootstrap completed. +ERROR:app.app:Failed to bootstrap database elements: (sqlite3.OperationalError) no such column: users.full_name +[SQL: SELECT users.id AS users_id, users.oidc_id AS users_oidc_id, users.email AS users_email, users.username AS users_username, users.password_hash AS users_password_hash, users.full_name AS users_full_name, users.role AS users_role, users.group_id AS users_group_id, users.avatar_url AS users_avatar_url, users.created_at AS users_created_at, users.last_login_at AS users_last_login_at, users.preferences AS users_preferences +FROM users +WHERE users.role = ? + LIMIT ? OFFSET ?] +[parameters: ('admin', 1, 0)] +(Background on this error at: https://sqlalche.me/e/20/e3q8) INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 8002): address already in use +INFO: Waiting for application shutdown. +INFO:app.app:[M6] Stopping gRPC server... +INFO: Application shutdown complete. โ Loading configuration from app/config.yaml Application startup... --- โ๏ธ Application Configuration --- - ACTIVE_LLM_PROVIDER: gemini - - ALLOW_OIDC_LOGIN: False + - ALLOW_OIDC_LOGIN: True - ALLOW_PASSWORD_LOGIN: *** - - DATABASE_URL: sqlite:///./test_cortex.db + - DATABASE_URL: sqlite:///./data/integration_test_ai_hub.db - DATA_DIR: /app/data - DB_MODE: sqlite - DEEPSEEK_API_KEY: sk-a...6bf2 @@ -68,453 +78,16 @@ - SECRET_KEY: aYc2...leZI - SKILLS_DIR: /app/data/skills - STT_API_KEY: AIza...sKuI - - STT_MODEL_NAME: gemini-2.5-flash + - STT_MODEL_NAME: None - STT_PROVIDER: google_gemini - STT_PROVIDERS: {} - SUPER_ADMINS: ['axieyangb@gmail.com'] - TTS_API_KEY: AIza...sKuI - - TTS_MODEL_NAME: gemini-2.5-flash-preview-tts + - TTS_MODEL_NAME: None - TTS_PROVIDER: google_gemini - TTS_PROVIDERS: {} - TTS_VOICE_NAME: Kore - VERSION: 1.0.0 ------------------------------------ Creating database tables... - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - [๐๐งน] Running Mirror Cleanup. Active Sessions: 0 - [๐๐งน] Running Mirror Cleanup. Active Sessions: 2 - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - -[92m04:27:13 - LiteLLM:INFO[0m: utils.py:3895 - -LiteLLM completion() model= deepseek-chat; provider = deepseek -INFO:LiteLLM: -LiteLLM completion() model= deepseek-chat; provider = deepseek - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - -INFO: 127.0.0.1:56846 - "HEAD /api/v1/users/login/local HTTP/1.1" 404 Not Found - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - -INFO: 127.0.0.1:51906 - "GET /api/v1/agent/version HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:51904 - "GET /api/v1/agent/version HTTP/1.1" 404 Not Found -INFO:app.core.grpc.services.grpc_server:[gRPC] Incoming RPC Call: /agent.AgentOrchestrator/SyncConfiguration -INFO:app.core.grpc.services.grpc_server:[gRPC] Incoming RPC Call: /agent.AgentOrchestrator/SyncConfiguration -INFO:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REQUEST from test-node-2 (token prefix: eh7D...) -INFO:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REQUEST from test-node-1 (token prefix: CKMs...) -WARNING:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REJECTED test-node-2: Invalid token or unknown node. -WARNING:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REJECTED test-node-1: Invalid token or unknown node. - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - -INFO:app.core.grpc.services.grpc_server:[gRPC] Incoming RPC Call: /agent.AgentOrchestrator/SyncConfiguration -INFO:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REQUEST from test-node-2 (token prefix: eh7D...) -INFO:app.core.grpc.services.grpc_server:[gRPC] Incoming RPC Call: /agent.AgentOrchestrator/SyncConfiguration -INFO:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REQUEST from test-node-1 (token prefix: CKMs...) -WARNING:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REJECTED test-node-2: Invalid token or unknown node. -WARNING:app.core.grpc.services.grpc_server:[๐] SyncConfiguration REJECTED test-node-1: Invalid token or unknown node. - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - -[92m04:32:15 - LiteLLM:INFO[0m: utils.py:3895 - -LiteLLM completion() model= deepseek-chat; provider = deepseek -INFO:LiteLLM: -LiteLLM completion() model= deepseek-chat; provider = deepseek - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - - -================================================== -๐ก CORTEX MESH DASHBOARD | 0 Nodes Online --------------------------------------------------- - No nodes currently connected. -================================================== - +Application shutdown... diff --git a/ai-hub/integration_tests/test_coworker_flow.py b/ai-hub/integration_tests/test_coworker_flow.py new file mode 100644 index 0000000..15c081e --- /dev/null +++ b/ai-hub/integration_tests/test_coworker_flow.py @@ -0,0 +1,159 @@ +import pytest +import httpx +import os +import uuid +import time +from conftest import BASE_URL + +def _headers(): + uid = os.getenv("SYNC_TEST_USER_ID", "") + return {"X-User-ID": uid} + +def test_coworker_sc1_mirror_check(): + """ + SC-1 (Mirror Check): + 1. Deploy an agent with co_worker_quality_gate=True. + 2. Wait for the agent to initialize (Status: evaluating). + 3. Use the /nodes/{id}/fs/ls API to verify the .cortex folder existence. + """ + node_id = f"test-coworker-sc1-{uuid.uuid4().hex[:8]}" + admin_id = os.getenv("SYNC_TEST_USER_ID", "") + instance_id = None + + with httpx.Client(timeout=30.0) as client: + try: + # 1. Register a test node + node_payload = { + "node_id": node_id, + "display_name": "Co-Worker SC-1 Node", + "is_active": True, + "skill_config": {"shell": {"enabled": True}, "sync": {"enabled": True}} + } + r_node = client.post(f"{BASE_URL}/nodes/admin", params={"admin_id": admin_id}, json=node_payload) + assert r_node.status_code == 200, f"Node registration failed: {r_node.text}" + + # 2. Deploy Agent with co_worker_quality_gate=True + deploy_payload = { + "name": "SC-1 Mirror Agent", + "description": "Tests .cortex mirror initialization", + "system_prompt": "You are a test agent. Create a simple hello world python script.", + "max_loop_iterations": 1, + "mesh_node_id": node_id, + "provider_name": "gemini", + "model_name": "gemini-1.5-flash", # Explicitly use flash + "trigger_type": "interval", + "interval_seconds": 60, # Long interval so it doesn't run twice during test + "co_worker_quality_gate": True, + "default_prompt": "Create app.py that prints hello.", + } + r_deploy = client.post(f"{BASE_URL}/agents/deploy", json=deploy_payload, headers=_headers()) + assert r_deploy.status_code == 200, f"Deploy failed: {r_deploy.text}" + instance_id = r_deploy.json()["instance_id"] + + # 3. Wait for agent to initialize (Status: evaluating) + print(f"\n[test] Waiting for agent {instance_id} to reach 'evaluating' status...") + found_evaluating = False + for _ in range(30): # 60s timeout + r_agent = client.get(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) + if r_agent.status_code == 200: + agent = r_agent.json() + if agent.get("evaluation_status") == "evaluating": + found_evaluating = True + break + time.sleep(2) + + assert found_evaluating, f"Agent did not reach 'evaluating' status." + + # 4. Use the /nodes/{id}/fs/ls API to verify the .cortex folder existence + r_ls = client.get(f"{BASE_URL}/nodes/{node_id}/fs/ls", params={"path": ".cortex"}, headers=_headers()) + assert r_ls.status_code == 200, f"Failed to ls .cortex: {r_ls.text}" + files = r_ls.json() + filenames = [f["name"] for f in files] + # Verify rubric.md and history.log are present as per test plan + assert any("rubric.md" in f for f in filenames), f"rubric.md not found in {filenames}" + assert any("history.log" in f for f in filenames), f"history.log not found in {filenames}" + + finally: + if instance_id: + client.delete(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) + client.delete(f"{BASE_URL}/nodes/admin/{node_id}", params={"admin_id": admin_id}) + +def test_coworker_sc3_limit_check(): + """ + SC-3 (Limit Check): + 1. Deploy an agent with max_rework_attempts=1 and rework_threshold=100. + 2. Trigger a run. + 3. Poll the /agents endpoint until evaluation_status == 'failed_limit'. + 4. Verify the latest_quality_score is present in the response. + """ + node_id = f"test-coworker-sc3-{uuid.uuid4().hex[:8]}" + admin_id = os.getenv("SYNC_TEST_USER_ID", "") + instance_id = None + + with httpx.Client(timeout=30.0) as client: + try: + # 1. Register a test node + node_payload = { + "node_id": node_id, + "display_name": "Co-Worker SC-3 Node", + "is_active": True, + "skill_config": {"shell": {"enabled": True}, "sync": {"enabled": True}} + } + r_node = client.post(f"{BASE_URL}/nodes/admin", params={"admin_id": admin_id}, json=node_payload) + assert r_node.status_code == 200, f"Node registration failed: {r_node.text}" + + # 2. Deploy Agent with max_rework_attempts=1 and rework_threshold=100 + deploy_payload = { + "name": "SC-3 Limit Agent", + "system_prompt": "You are a test agent. Create a simple hello world python script.", + "max_loop_iterations": 1, + "mesh_node_id": node_id, + "provider_name": "gemini", + "model_name": "gemini-1.5-flash", + "trigger_type": "webhook", # Use webhook to trigger manually + "co_worker_quality_gate": True, + "max_rework_attempts": 1, + "rework_threshold": 100, # Impossible to pass + "default_prompt": "Create app.py that prints hello.", + } + r_deploy = client.post(f"{BASE_URL}/agents/deploy", json=deploy_payload, headers=_headers()) + assert r_deploy.status_code == 200, f"Deploy failed: {r_deploy.text}" + instance_id = r_deploy.json()["instance_id"] + + # 3. Get the webhook secret and trigger it + r_trig = client.get(f"{BASE_URL}/agents/{instance_id}/triggers", headers=_headers()) + webhook_trigger = next(t for t in r_trig.json() if t["trigger_type"] == "webhook") + secret = webhook_trigger["webhook_secret"] + + r_hook = client.post( + f"{BASE_URL}/agents/{instance_id}/webhook", + params={"token": secret}, + json={"prompt": "Go!"} + ) + assert r_hook.status_code == 202 + + # 4. Poll until evaluation_status == 'failed_limit' + print(f"\n[test] Waiting for agent {instance_id} to reach 'failed_limit' status...") + failed_limit = False + latest_score = None + for _ in range(60): # 120s timeout + r_agents = client.get(f"{BASE_URL}/agents", headers=_headers()) + if r_agents.status_code == 200: + agents = r_agents.json() + agent = next((a for a in agents if a["id"] == instance_id), None) + if agent: + status = agent.get("evaluation_status") + latest_score = agent.get("latest_quality_score") + if status == "failed_limit": + failed_limit = True + break + time.sleep(2) + + assert failed_limit, f"Agent did not reach 'failed_limit' status." + assert latest_score is not None, "latest_quality_score should be present in the response" + + finally: + if instance_id: + client.delete(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) + client.delete(f"{BASE_URL}/nodes/admin/{node_id}", params={"admin_id": admin_id}) + diff --git a/ai-hub/test.db-shm b/ai-hub/test.db-shm index 17b4d6b..a54b925 100644 --- a/ai-hub/test.db-shm +++ b/ai-hub/test.db-shm Binary files differ diff --git a/docs/features/harness_engineering/harness_tasks/foundation.md b/docs/features/harness_engineering/harness_tasks/foundation.md index 0f402ad..37af367 100644 --- a/docs/features/harness_engineering/harness_tasks/foundation.md +++ b/docs/features/harness_engineering/harness_tasks/foundation.md @@ -12,9 +12,9 @@ - [x] `co_worker_enabled`: (Boolean) Default: `False`. - [x] `rework_threshold`: (Integer) Range 0-100. Default: `80`. - [x] `max_rework_count`: (Integer) Default: `3`. -- [ ] **Workspace Mirroring**: - - [ ] Create `.cortex/` directory in the agent's unique jail during initialization. - - [ ] Implement `history.log` append logic (JSON format). +- [x] **Workspace Mirroring**: + - [x] Create `.cortex/` directory in the agent's unique jail during initialization. + - [x] Implement `history.log` append logic (JSON format). ## Claude Code Inspiration: Memory Context *Reference: `src/memdir/memdir.ts`* diff --git a/docs/features/harness_engineering/harness_tasks/orchestration.md b/docs/features/harness_engineering/harness_tasks/orchestration.md index a22c898..34a5f41 100644 --- a/docs/features/harness_engineering/harness_tasks/orchestration.md +++ b/docs/features/harness_engineering/harness_tasks/orchestration.md @@ -8,15 +8,15 @@ Implement the logic that triggers the Co-Worker agent at pre-run and post-run phases, managing the dual-stage evaluation. ## Task Breakdown -- [ ] **Request-Specific Rubric Generator**: - - [ ] Implement a pre-execution hook in `agent_loop.py`. - - [ ] Prompt the Co-Pilot to generate a task-specific `rubric.md`. -- [ ] **Dual-Stage Post-Run Hook**: - - [ ] **Stage 2A (Blind Rating)**: Implement gRPC/Executor logic to call the Co-Pilot with a stripped context. - - [ ] **Stage 2B (Delta Analysis)**: Implement context-aware gap discovery (Score-Anonymized). -- [ ] **Directive-Based Rework Injection**: - - [ ] Update the `agent_loop.py` rework trigger logic. - - [ ] Instead of passing raw feedback, format the Co-Worker's gaps into a **Directive block** (e.g., *"Actionable Command: Refactor X to resolve Y"*). +- [x] **Request-Specific Rubric Generator**: + - [x] Implement a pre-execution hook in `agent_loop.py`. + - [x] Prompt the Co-Pilot to generate a task-specific `rubric.md`. +- [x] **Dual-Stage Post-Run Hook**: + - [x] **Stage 2A (Blind Rating)**: Implement gRPC/Executor logic to call the Co-Pilot with a stripped context. + - [x] **Stage 2B (Delta Analysis)**: Implement context-aware gap discovery (Score-Anonymized). +- [x] **Directive-Based Rework Injection**: + - [x] Update the `agent_loop.py` rework trigger logic. + - [x] Instead of passing raw feedback, format the Co-Worker's gaps into a **Directive block** (e.g., *"Actionable Command: Refactor X to resolve Y"*). - [ ] **Context Compaction Gate**: - [ ] Implement a logic to detect token usage/turn count in the rework loop. - [ ] If `Attempts > 2`, trigger the Co-Pilot to summarize the `.cortex/history.log` and replace the full rework history with a **Compacted Delta** for the Main Agent. diff --git a/docs/features/harness_engineering/harness_tasks/test_plan.md b/docs/features/harness_engineering/harness_tasks/test_plan.md new file mode 100644 index 0000000..d57c86d --- /dev/null +++ b/docs/features/harness_engineering/harness_tasks/test_plan.md @@ -0,0 +1,52 @@ +# Co-Worker Harness: Integration Test Plan + +## Objective +To verify the full end-to-end "Co-Worker" lifecycle, ensuring that the self-improvement loop (Deployment -> Rubric -> Evaluation -> Rework -> Resolution) functions correctly across the API and Mesh node layers. + +## 1. Test Scenarios + +### SC-1: Foundation & Mirroring +- **Goal**: Verify that enabling the quality gate correctly initializes the agent's workspace. +- **Steps**: + 1. Deploy an agent with `co_worker_quality_gate=True`. + 2. Wait for the first interval trigger. + 3. Call `/nodes/{node_id}/fs/ls?path=.cortex` to verify the directory exists. + 4. Verify `rubric.md` and `history.log` are initialized. + +### SC-2: The Rework Loop (Failure -> Success) +- **Goal**: Verify the agent can fail a gate, receive a directive, and then pass. +- **Setup**: Use a "Contradictory Requirement" to force a failure. + - **Initial Prompt**: "Write a python script `app.py` that prints 'Hello World'." + - **Rubric Generation**: We expect the rubric to require basic script functionality. +- **Verification**: + - `AgentInstance.evaluation_status` should transition: `evaluating` -> `reworking` -> `evaluating` -> `passed`. + - `history.log` should contain at least one entry with the low score and justification. + +### SC-3: Max Attempts Gate (Failure -> Termination) +- **Goal**: Ensure the agent doesn't loop infinitely. +- **Steps**: + 1. Set `rework_threshold=100` (nearly impossible). + 2. Set `max_rework_attempts=2`. + 3. Trigger the agent. + 4. Verify that after 2 rework attempts, the `evaluation_status` becomes `failed_limit`. + 5. Verify `AgentInstance.last_error` contains the post-mortem summary. + +### SC-4: Context Compaction +- **Goal**: Verify that Attempt 3 uses a compacted prompt. +- **Steps**: + 1. Deploy agent with `max_rework_attempts=3`. + 2. Trigger enough reworks to reach Attempt 3. + 3. (Advanced) Mock or inspect the registry events to see the `context_state: compacted` event. + +## 2. API Verification Points + +| Endpoint | Method | Expected Data | +| :--- | :--- | :--- | +| `/agents` | GET | `co_worker_quality_gate`, `latest_quality_score` | +| `/agents/{id}/triggers` | GET | `rework_threshold`, `max_rework_attempts` | +| `/nodes/{node_id}/fs/cat` | GET | Read `.cortex/history.log` and `.cortex/feedback.md` | + +## 3. Implementation Plan (for Test Agent) +1. Create `ai-hub/integration_tests/test_coworker_flow.py`. +2. Implement utility `wait_for_evaluation(instance_id, target_status, timeout=120)`. +3. Use the `gemini-2.0-flash` provider for fast integration testing. diff --git a/frontend/src/features/agents/components/AgentHarnessPage.js b/frontend/src/features/agents/components/AgentHarnessPage.js index dd67497..ffb1c0e 100644 --- a/frontend/src/features/agents/components/AgentHarnessPage.js +++ b/frontend/src/features/agents/components/AgentHarnessPage.js @@ -1,6 +1,7 @@ import React, { useState, useEffect } from 'react'; import { getAgents, getAgentTelemetry, updateAgentStatus, deployAgent, deleteAgent, getUserConfig, getUserAccessibleNodes, getAgentTriggers } from '../../../services/apiService'; import { AreaChart, Area, XAxis, YAxis, Tooltip, ResponsiveContainer } from 'recharts'; +import BuddyAvatar from './BuddyAvatar'; // Polling interval in ms const POLLING_INTERVAL = 5000; diff --git a/frontend/src/features/agents/components/BuddyAvatar.js b/frontend/src/features/agents/components/BuddyAvatar.js new file mode 100644 index 0000000..dc04303 --- /dev/null +++ b/frontend/src/features/agents/components/BuddyAvatar.js @@ -0,0 +1,68 @@ +import React from 'react'; + +/** + * BuddyAvatar: A mood-based visual indicator for the Co-Worker agent's state. + * Renders different expressions based on quality score and evaluation status. + */ +export const BuddyAvatar = ({ score, status }) => { + let expression = '๐'; // Neutral/Happy + let moodColor = 'text-emerald-500'; + let borderColor = 'border-emerald-500/20'; + let bgColor = 'bg-emerald-500/10'; + let label = 'Healthy'; + + if (status === 'evaluating') { + expression = '๐ง'; // Analyzing + moodColor = 'text-indigo-500'; + borderColor = 'border-indigo-500/30'; + bgColor = 'bg-indigo-500/10'; + label = 'Auditing...'; + } else if (status === 'reworking') { + expression = '๐ ๏ธ'; // Reworking + moodColor = 'text-amber-500'; + borderColor = 'border-amber-500/30'; + bgColor = 'bg-amber-500/10'; + label = 'Refining'; + } else if (status === 'failed_limit') { + expression = '๐ค'; // Stern/Critical - FAILED AFTER MAX ATTEMPTS + moodColor = 'text-rose-500'; + borderColor = 'border-rose-500/40'; + bgColor = 'bg-rose-500/20'; + label = 'CRITICAL FAILURE'; + } else if (score < 60 && score > 0) { + expression = '๐'; // Low score concern + moodColor = 'text-rose-400'; + borderColor = 'border-rose-400/30'; + bgColor = 'bg-rose-400/5'; + label = 'Sub-Par'; + } else if (score >= 80) { + expression = 'โจ'; // Excellent + moodColor = 'text-emerald-400'; + borderColor = 'border-emerald-400/30'; + bgColor = 'bg-emerald-400/10'; + label = 'Optimal'; + } + + return ( +