diff --git a/ai-hub/integration_tests/test_coworker_flow.py b/ai-hub/integration_tests/test_coworker_flow.py index d66daa2..b2591c5 100644 --- a/ai-hub/integration_tests/test_coworker_flow.py +++ b/ai-hub/integration_tests/test_coworker_flow.py @@ -41,21 +41,21 @@ instance_id = r_deploy.json()["instance_id"] # 3. Wait for agent to initialize (Status: evaluating) - print(f"\n[test] Waiting for agent {instance_id} to reach 'evaluating' status...") + print(f"\n[test] Waiting for agent {instance_id} to reach evaluation status...") found_evaluating = False sync_workspace_id = r_deploy.json().get("sync_workspace_id") - for _ in range(300): # 300s timeout + for _ in range(150): # 300s timeout (increased for flakiness) r_agent = client.get(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) if r_agent.status_code == 200: agent = r_agent.json() status = agent.get("evaluation_status") print(f" [debug] Current status: '{status}'") - if status and status != "None": + if status and status != "None" and "Executing" in status: found_evaluating = True break time.sleep(2) - assert found_evaluating, f"Agent did not reach 'evaluating' status." + assert found_evaluating, f"Agent did not reach evaluation status." # 4. Use the /nodes/{id}/fs/ls API to verify the .cortex folder existence params = {"path": ".cortex", "session_id": sync_workspace_id} @@ -119,7 +119,7 @@ print(f"\n[test] Waiting for agent {instance_id} to reach 'failed_limit' status...") failed_limit = False latest_score = None - for _ in range(300): # 300s timeout + for _ in range(150): # 300s timeout r_agents = client.get(f"{BASE_URL}/agents", headers=_headers()) if r_agents.status_code == 200: agents = r_agents.json() @@ -127,7 +127,7 @@ if agent: status = agent.get("evaluation_status") latest_score = agent.get("latest_quality_score") - if status == "failed_limit": + if status and "failed_limit" in status: failed_limit = True break time.sleep(2) @@ -176,16 +176,17 @@ client.post(f"{BASE_URL}/agents/{instance_id}/webhook", params={"token": secret}, json={"prompt": "Go!"}) found_reworking = False - for _ in range(120): + for _ in range(150): # 300s timeout r_agents = client.get(f"{BASE_URL}/agents", headers=_headers()) if r_agents.status_code == 200: agent = next((a for a in r_agents.json() if a["id"] == instance_id), None) - if agent and agent.get("evaluation_status") == "reworking": + status = agent.get("evaluation_status") if agent else "" + if status and "Rework" in status: found_reworking = True break time.sleep(2) - assert found_reworking, "Agent never entered 'reworking' status." + assert found_reworking, f"Agent never entered rework status. Current: {status}" sync_workspace_id = r_deploy.json().get("sync_workspace_id") r_ls = client.get(f"{BASE_URL}/nodes/{node_id}/fs/cat", params={"path": ".cortex/history.log", "session_id": sync_workspace_id}, headers=_headers()) diff --git a/ai-hub/integration_tests/test_coworker_full_journey.py b/ai-hub/integration_tests/test_coworker_full_journey.py index 4360dcd..d92e0be 100644 --- a/ai-hub/integration_tests/test_coworker_full_journey.py +++ b/ai-hub/integration_tests/test_coworker_full_journey.py @@ -47,7 +47,7 @@ "trigger_type": "webhook", "co_worker_quality_gate": True, "max_rework_attempts": 2, - "rework_threshold": 98, # Extremely high to force rework + "rework_threshold": 101, # Impossible to pass to force multiple rework rounds "default_prompt": test_prompt, } r_deploy = client.post(f"{BASE_URL}/agents/deploy", json=deploy_payload, headers=_headers()) @@ -65,7 +65,7 @@ print(f"\n[Journey] Starting tracking for Agent {instance_id}") seen_statuses = set() scores_log = [] - max_wait = 180 # 3 minutes total for a 2-rework journey + max_wait = 300 # 5 minutes total for a slow rework journey start_time = time.time() while time.time() - start_time < max_wait: @@ -99,7 +99,7 @@ # We expect to see various statuses throughout the loop print(f"Seen statuses: {seen_statuses}") # Minimum expected statuses to prove the loop happened - assert any("Analyzing" in s for s in seen_statuses), "Should have seen an Analysis phase" + assert any("Auditing" in s for s in seen_statuses) or any("Analyzing" in s for s in seen_statuses), "Should have seen an Auditing/Analysis phase" # Check if rework happened (should have if score < 98) if any("Rework" in s for s in seen_statuses): diff --git a/ai-hub/integration_tests/test_file_sync.py b/ai-hub/integration_tests/test_file_sync.py index aaf1624..0e88006 100644 --- a/ai-hub/integration_tests/test_file_sync.py +++ b/ai-hub/integration_tests/test_file_sync.py @@ -811,8 +811,8 @@ return False print(f"[Case 512MB] Polling {NODE_2} for the file...") - node2_file = _poll_until(_check_node2_ls, timeout=300) - assert node2_file, f"512MB file {filename} did not reach {NODE_2} within 300s in full size." + node2_file = _poll_until(_check_node2_ls, timeout=600) + assert node2_file, f"512MB file {filename} did not reach {NODE_2} within 600s in full size." print(f"[Case 512MB] ✅ {NODE_2} verified 512MB file sync with correct size.") # Verify Server Mirror also saw it and recorded 512MB size