diff --git a/ai-hub/app/api/routes/agents.py b/ai-hub/app/api/routes/agents.py index 9f4bfdf..456d7a7 100644 --- a/ai-hub/app/api/routes/agents.py +++ b/ai-hub/app/api/routes/agents.py @@ -117,6 +117,9 @@ raise HTTPException(status_code=404, detail="Instance not found") instance.status = request.status + if request.status == "idle": + instance.last_error = None + instance.evaluation_status = None db.commit() db.refresh(instance) return instance diff --git a/ai-hub/app/api/schemas.py b/ai-hub/app/api/schemas.py index d5b1473..54667a9 100644 --- a/ai-hub/app/api/schemas.py +++ b/ai-hub/app/api/schemas.py @@ -549,6 +549,7 @@ name: str description: Optional[str] = None system_prompt_path: Optional[str] = None + system_prompt_content: Optional[str] = None max_loop_iterations: int = 20 co_worker_quality_gate: bool = False rework_threshold: int = 80 diff --git a/ai-hub/app/core/orchestration/agent_loop.py b/ai-hub/app/core/orchestration/agent_loop.py index 2260809..1d8b6d0 100644 --- a/ai-hub/app/core/orchestration/agent_loop.py +++ b/ai-hub/app/core/orchestration/agent_loop.py @@ -263,19 +263,42 @@ if not safe_commit(): return content_buffer = "" - # Accumulate round metrics into task totals + # --- Persistence: Update Cumulative Metrics in DB (Real-time) --- + instance = db.query(AgentInstance).filter(AgentInstance.id == agent_id).first() + if instance: + instance.total_input_tokens = (instance.total_input_tokens or 0) + round_input_tokens + instance.total_output_tokens = (instance.total_output_tokens or 0) + round_output_tokens + + # Merge tool counts + if round_tool_counts: + current_counts = (instance.tool_call_counts or {}).copy() + for tool, counts in round_tool_counts.items(): + if tool not in current_counts: + current_counts[tool] = {"calls": 0, "successes": 0, "failures": 0} + + # Handle both dict and legacy int formats + c_inc = counts.get("calls", counts) if isinstance(counts, dict) else counts + s_inc = counts.get("successes", counts) if isinstance(counts, dict) else counts + f_inc = counts.get("failures", 0) if isinstance(counts, dict) else 0 + + # Handle existing int in DB + if isinstance(current_counts[tool], int): + current_counts[tool] = {"calls": current_counts[tool], "successes": current_counts[tool], "failures": 0} + + current_counts[tool]["calls"] += c_inc + current_counts[tool]["successes"] += s_inc + current_counts[tool]["failures"] += f_inc + + instance.tool_call_counts = current_counts + from sqlalchemy.orm.attributes import flag_modified + flag_modified(instance, "tool_call_counts") + + if not safe_commit(): return + + # Accumulate round metrics into task local totals (for final logging/trace if needed) total_task_input_tokens += round_input_tokens total_task_output_tokens += round_output_tokens - for tool, counts in round_tool_counts.items(): - if tool not in total_task_tool_counts: - total_task_tool_counts[tool] = {"calls": 0, "successes": 0, "failures": 0} - if isinstance(counts, dict): - total_task_tool_counts[tool]["calls"] += counts.get("calls", 0) - total_task_tool_counts[tool]["successes"] += counts.get("successes", 0) - total_task_tool_counts[tool]["failures"] += counts.get("failures", 0) - else: # Legacy int support - total_task_tool_counts[tool]["calls"] += counts - total_task_tool_counts[tool]["successes"] += counts + # (total_task_tool_counts merging logic removed here as we update DB directly) exec_duration = time.time() - execution_start round_sub_events.append({"name": "Agent execution", "duration": round(exec_duration, 2), "timestamp": time.time()}) @@ -498,8 +521,6 @@ # Update metrics regardless of final status (as long as we finished the loop) elapsed = int(time.time() - loop_start) instance.total_running_time_seconds = (instance.total_running_time_seconds or 0) + elapsed - instance.total_input_tokens = (instance.total_input_tokens or 0) + total_task_input_tokens - instance.total_output_tokens = (instance.total_output_tokens or 0) + total_task_output_tokens # Success calculation final_score = getattr(instance, 'latest_quality_score', 0) or 0 @@ -511,23 +532,6 @@ if final_score >= threshold or not co_worker_enabled: instance.successful_runs = (instance.successful_runs or 0) + 1 - if total_task_tool_counts: - import copy - current_counts = copy.deepcopy(instance.tool_call_counts or {}) - for k, v in total_task_tool_counts.items(): - if k in current_counts and isinstance(current_counts[k], int): - current_counts[k] = {"calls": current_counts[k], "successes": current_counts[k], "failures": 0} - if not isinstance(v, dict): - v = {"calls": v, "successes": v, "failures": 0} - if k not in current_counts: - current_counts[k] = {"calls": 0, "successes": 0, "failures": 0} - current_counts[k]["calls"] += v.get("calls", 0) - current_counts[k]["successes"] += v.get("successes", 0) - current_counts[k]["failures"] += v.get("failures", 0) - instance.tool_call_counts = current_counts - from sqlalchemy.orm.attributes import flag_modified - flag_modified(instance, "tool_call_counts") - # Clear reasoning as the task is now complete instance.last_reasoning = None if not safe_commit(): return diff --git a/ai-hub/app/db/models/agent.py b/ai-hub/app/db/models/agent.py index 473b9aa..a72b47b 100644 --- a/ai-hub/app/db/models/agent.py +++ b/ai-hub/app/db/models/agent.py @@ -12,6 +12,11 @@ description = Column(String, nullable=True) system_prompt_path = Column(String, nullable=True) max_loop_iterations = Column(Integer, default=20) + + @property + def system_prompt_content(self): + """Helper to return the prompt content, aliasing system_prompt_path for now.""" + return self.system_prompt_path # Co-Worker Loop Settings co_worker_quality_gate = Column(Boolean, default=False) diff --git a/ai-hub/integration_tests/test_agents.py b/ai-hub/integration_tests/test_agents.py index bf9b455..08f2ec0 100644 --- a/ai-hub/integration_tests/test_agents.py +++ b/ai-hub/integration_tests/test_agents.py @@ -199,3 +199,81 @@ # 6. Cleanup client.delete(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) client.delete(f"{BASE_URL}/nodes/admin/{node_id}", params={"admin_id": admin_id}) + +def test_agent_metrics_reset(): + """ + Test Agent Metrics Tracking and Reset: + 1. Deploy agent + 2. Trigger sync webhook to generate metrics (tokens) + 3. Verify metrics are non-zero in agent instance + 4. Call /metrics/reset + 5. Verify metrics are zeroed + """ + node_id = f"test-metrics-node-{uuid.uuid4().hex[:8]}" + admin_id = os.getenv("SYNC_TEST_USER_ID", "") + + with httpx.Client(timeout=30.0) as client: + # 1. Register a test node + node_payload = { + "node_id": node_id, + "display_name": "Metrics Test Node", + "is_active": True, + "skill_config": {"shell": {"enabled": True}} + } + client.post(f"{BASE_URL}/nodes/admin", params={"admin_id": admin_id}, json=node_payload) + + # 2. Deploy Agent + deploy_payload = { + "name": "Metrics Agent", + "system_prompt": "You are a helpful assistant.", + "max_loop_iterations": 1, + "mesh_node_id": node_id, + "provider_name": "gemini", + "trigger_type": "webhook", + "default_prompt": "Hello", + "initial_prompt": None + } + r_deploy = client.post(f"{BASE_URL}/agents/deploy", json=deploy_payload, headers=_headers()) + assert r_deploy.status_code == 200 + instance_id = r_deploy.json()["instance_id"] + + # 3. Get webhook secret + r_trig = client.get(f"{BASE_URL}/agents/{instance_id}/triggers", headers=_headers()) + secret = next(t for t in r_trig.json() if t["trigger_type"] == "webhook")["webhook_secret"] + + # 4. Trigger SYNC Webhook to generate tokens + print("\n[test] Triggering SYNC Webhook to generate token usage...") + r_sync = client.post( + f"{BASE_URL}/agents/{instance_id}/webhook", + params={"token": secret, "sync": "true"}, + json={"prompt": "Write a short 2-sentence poem about gravity."} + ) + assert r_sync.status_code == 200 + + # 5. Verify Metrics captured + r_list = client.get(f"{BASE_URL}/agents", headers=_headers()) + agent = next(a for a in r_list.json() if a["id"] == instance_id) + + print(f"[test] Captured Metrics - Runs: {agent['total_runs']}, In Tokens: {agent['total_input_tokens']}, Out Tokens: {agent['total_output_tokens']}") + assert agent["total_runs"] >= 1 + assert agent["total_input_tokens"] > 0 + assert agent["total_output_tokens"] > 0 + + # 6. RESET METRICS + print("[test] Resetting metrics...") + r_reset = client.post(f"{BASE_URL}/agents/{instance_id}/metrics/reset", headers=_headers()) + assert r_reset.status_code == 200 + + # 7. Verify Zeroed + r_list_post = client.get(f"{BASE_URL}/agents", headers=_headers()) + agent_post = next(a for a in r_list_post.json() if a["id"] == instance_id) + + print(f"[test] Post-Reset Metrics - Runs: {agent_post['total_runs']}, In Tokens: {agent_post['total_input_tokens']}, Out Tokens: {agent_post['total_output_tokens']}") + assert agent_post["total_runs"] == 0 + assert agent_post["total_input_tokens"] == 0 + assert agent_post["total_output_tokens"] == 0 + assert agent_post["successful_runs"] == 0 + + # 8. Cleanup + client.delete(f"{BASE_URL}/agents/{instance_id}", headers=_headers()) + client.delete(f"{BASE_URL}/nodes/admin/{node_id}", params={"admin_id": admin_id}) diff --git a/frontend/src/features/agents/components/drilldown/EvaluationPanel.js b/frontend/src/features/agents/components/drilldown/EvaluationPanel.js index f81423f..3847b5b 100644 --- a/frontend/src/features/agents/components/drilldown/EvaluationPanel.js +++ b/frontend/src/features/agents/components/drilldown/EvaluationPanel.js @@ -51,13 +51,13 @@
- {entry.reason || entry.message} + {entry.reason || entry.message || entry.text || "No details provided"}
{entry.sub_events?.length > 0 && ( diff --git a/frontend/src/features/agents/components/drilldown/MetricsPanel.js b/frontend/src/features/agents/components/drilldown/MetricsPanel.js index 4ae59ef..81fac56 100644 --- a/frontend/src/features/agents/components/drilldown/MetricsPanel.js +++ b/frontend/src/features/agents/components/drilldown/MetricsPanel.js @@ -1,8 +1,18 @@ import React from 'react'; const MetricsPanel = ({ agent, tokenUsage, tokenError, flippedCards, setFlippedCards, handleResetMetrics, clearing }) => { + + const formatTimeLocal = (utcString) => { + if (!utcString) return 'Never'; + const dateStr = utcString.endsWith('Z') || utcString.includes('+') ? utcString : utcString + 'Z'; + return new Date(dateStr).toLocaleString(undefined, { + month: 'short', day: 'numeric', + hour: '2-digit', minute: '2-digit', second: '2-digit' + }); + }; + return ( -| Tool Surface | +Invocations | +Success | +Failure Rate | +
|---|---|---|---|
| {tool} | +{calls} | ++ + {successes} + + | +20 ? 'text-rose-500' : 'text-gray-400'}`}> + {failRate > 0 ? `${failRate}%` : 0%} + | +
| + No autonomous tool transactions recorded in this session environment. + | +|||