diff --git a/ai-hub/app/api/dependencies.py b/ai-hub/app/api/dependencies.py index 4946e46..11de447 100644 --- a/ai-hub/app/api/dependencies.py +++ b/ai-hub/app/api/dependencies.py @@ -32,13 +32,15 @@ # HARDENING: In production, X-User-ID must be verified via a shared secret from the proxy from app.config import settings - if settings.SECRET_KEY and settings.SECRET_KEY not in ["dev", "generate-me"]: - if not x_proxy_secret or x_proxy_secret != settings.SECRET_KEY: - logging.warning(f"Invalid X-Proxy-Secret from {x_user_id}. Identity claim rejected.") - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Invalid Proxy Secret. Identity claim rejected." - ) + if settings.SECRET_KEY and settings.SECRET_KEY not in ["dev", "generate-me", "dev-secret-key-1337"]: + # Strict enforcement only if OIDC is disabled or if the secret is provided (to verify it) + if not settings.OIDC_ENABLED or x_proxy_secret: + if not x_proxy_secret or x_proxy_secret != settings.SECRET_KEY: + logging.warning(f"Invalid X-Proxy-Secret from {x_user_id}. Identity claim rejected.") + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Invalid Proxy Secret. Identity claim rejected." + ) user = db.query(models.User).filter(models.User.id == x_user_id).first() if not user: diff --git a/ai-hub/app/api/schemas.py b/ai-hub/app/api/schemas.py index 287a0a7..d5b1473 100644 --- a/ai-hub/app/api/schemas.py +++ b/ai-hub/app/api/schemas.py @@ -296,6 +296,9 @@ # The timestamp for when the message was created. created_at: datetime + # Metadata such as quality audits, tool usage stats, etc. + message_metadata: Optional[dict] = None + # URL to the saved audio file audio_url: Optional[str] = None # Whether audio exists for this message diff --git a/ai-hub/app/core/orchestration/agent_loop.py b/ai-hub/app/core/orchestration/agent_loop.py index c78442a..a387693 100644 --- a/ai-hub/app/core/orchestration/agent_loop.py +++ b/ai-hub/app/core/orchestration/agent_loop.py @@ -171,6 +171,9 @@ current_attempt = 0 final_result = None + if evaluator: + await evaluator.log_event("Execution Initialized", "Agent loop warming up for primary task execution.") + # --- MAIN REWORK LOOP --- loop_start = time.time() # Handle scope for exception reporting while current_attempt <= max_rework_attempts: @@ -185,6 +188,7 @@ final_input_tokens = 0 final_output_tokens = 0 final_answer = "" + last_assistant_msg_id = None instance = db.query(AgentInstance).filter(AgentInstance.id == agent_id).first() instance.last_reasoning = "" @@ -210,6 +214,7 @@ user_service=user_service ): if event.get("type") == "finish": + last_assistant_msg_id = event.get("message_id") final_tool_counts = event.get("tool_counts", {}) final_input_tokens = event.get("input_tokens", 0) final_output_tokens = event.get("output_tokens", 0) @@ -278,7 +283,13 @@ if registry and instance.mesh_node_id: registry.emit(instance.mesh_node_id, "status_update", {"evaluation_status": instance.evaluation_status}) - blind_eval = await evaluator.evaluate_blind(prompt, rubric_content, final_answer) + # transparency context for Auditor + partner_ctx = { + "system_prompt": template.system_prompt_content, + "skills": [s.name for s in (agent_session.skills if agent_session and agent_session.skills else [])] + } + + blind_eval = await evaluator.evaluate_blind(prompt, rubric_content, final_answer, partner_context=partner_ctx) score = blind_eval.get("score", 0) justification = blind_eval.get("justification", "") blind_duration = blind_eval.get("duration", 0) @@ -306,6 +317,28 @@ if len(summary_reason) > 250: summary_reason = summary_reason[:247] + "..." await evaluator.log_round(current_attempt + 1, score, summary_reason, "Final answer passed quality gate.", sub_events=round_sub_events, duration=total_success_duration) + + # PERSISTENCE: Save this audit to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": success_feedback, + "history": current_history, + "score": score, + "passed": True + } + } + }) + if not safe_commit(): return + break # Success! # Check Rework Limits @@ -320,6 +353,28 @@ if len(summary_reason) > 250: summary_reason = summary_reason[:247] + "..." await evaluator.log_round(current_attempt + 1, score, summary_reason, "Failed quality gate after max attempts.", sub_events=round_sub_events, duration=total_fail_duration) + + # PERSISTENCE: Save this status to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": f"# Evaluation Failed (Max Attempts)\n\n**Score**: {score}/100\n\n**Justification**:\n{justification}", + "history": current_history, + "score": score, + "passed": False + } + } + }) + if not safe_commit(): return + break # No more reworks # Stage Delta (Gap Analysis) @@ -341,7 +396,7 @@ directive_feedback = await evaluator.generate_compaction_summary(prompt, hist_log) round_sub_events.append({"name": "Context compaction", "duration": round(time.time() - delta_start, 2), "timestamp": time.time()}) else: - directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning) + directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning, partner_context=partner_ctx) round_sub_events.append({"name": "Delta analysis", "duration": round(time.time() - delta_start, 2), "timestamp": time.time()}) # M3: Categorization & Duration Metrics @@ -359,6 +414,27 @@ # Log this round with summary and duration await evaluator.log_round(current_attempt + 1, score, summary_reason, directive_feedback, sub_events=round_sub_events, duration=total_round_duration) + # PERSISTENCE: Save this audit to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": full_audit_stream, + "history": current_history, + "score": score, + "passed": False + } + } + }) + if not safe_commit(): return + # Trigger next iteration current_prompt = f"### CO-WORKER DIRECTIVE (ATTEMPT {current_attempt + 1})\n\n{directive_feedback}\n\nProceed with rework." current_attempt += 1 @@ -417,6 +493,10 @@ instance.last_reasoning = None if not safe_commit(): return + if evaluator: + total_elapsed = time.time() - loop_start + await evaluator.log_event("Process Completed", f"Lifecycle finished successfully after {current_attempt + 1} rounds.", duration=total_elapsed) + return final_result except Exception as e: diff --git a/ai-hub/app/core/orchestration/harness_evaluator.py b/ai-hub/app/core/orchestration/harness_evaluator.py index 5c71266..2f88e0b 100644 --- a/ai-hub/app/core/orchestration/harness_evaluator.py +++ b/ai-hub/app/core/orchestration/harness_evaluator.py @@ -239,15 +239,28 @@ logger.error(f"[HarnessEvaluator] Rubric generation failed: {e}") return None - async def evaluate_blind(self, initial_prompt: str, rubric_content: str, result_content: str) -> Dict[str, Any]: + async def evaluate_blind(self, initial_prompt: str, rubric_content: str, result_content: str, partner_context: Dict = None) -> Dict[str, Any]: """Stage 2A: The Blind Rating (Absolute Objectivity). Uses tools to inspect results.""" start = time.time() + partner_info = "" + if partner_context: + partner_info = f""" +## PARTNER PROFILE (Main Agent context): +SYSTEM PROMPT: +{partner_context.get('system_prompt', 'N/A')} + +AVAILABLE SKILLS (Tools): +{partner_context.get('skills', 'N/A')} +--- +""" + system_prompt = f"""You are the Co-Worker Evaluator (Blind Auditor). Your goal is to perform a BLIND evaluation of the Main Agent's work. You have NO knowledge of previous rounds or internal reasoning. You only see the goal and the result. Original Request: {initial_prompt} +{partner_info} Current Result: --- @@ -272,7 +285,7 @@ res["duration"] = time.time() - start return res - async def evaluate_delta(self, initial_prompt: str, rubric_content: str, blind_justification: str, history_log: List[Dict[str, Any]], transcript: str) -> str: + async def evaluate_delta(self, initial_prompt: str, rubric_content: str, blind_justification: str, history_log: List[Dict[str, Any]], transcript: str, partner_context: Dict = None) -> str: """Stage 2B: The Delta Analysis. Identifies gaps by comparing result to reasoning transcript.""" start = time.time() @@ -300,10 +313,21 @@ {transcript} --- +## PARTNER PROFILE (Main Agent context): +SYSTEM PROMPT: +{partner_context.get('system_prompt', 'N/A') if partner_context else 'N/A'} + +AVAILABLE SKILLS (Tools): +{partner_context.get('skills', 'N/A') if partner_context else 'N/A'} +--- + MISSION: -1. Identify the 'Delta' (what was intended vs. what was actually committed). -2. Spot 'Gold-Plating' (did they do extra work not asked for?). -3. Format feedback as ACTIONABLE COMMANDS (Directives). +1. Compare the Blind Justification with the Execution Transcript. +2. Identify why the Agent failed to meet the criteria despite its reasoning. +3. Identify the 'Delta' (what was intended vs. what was actually committed). +4. Spot 'Gold-Plating' (did they do extra work not asked for?). +5. Format feedback as ACTIONABLE COMMANDS (Directives). +6. If this is a repeat failure, provide a solution sketch or absolute directive. Example: "Directive: Refactor auth.py:L10 to use settings.API_KEY instead of hardcoded string." Format as Markdown. Start with '# Rework Instructions'.""" diff --git a/ai-hub/app/core/skills/bootstrap.py b/ai-hub/app/core/skills/bootstrap.py index 7e276ca..af5c3fd 100644 --- a/ai-hub/app/core/skills/bootstrap.py +++ b/ai-hub/app/core/skills/bootstrap.py @@ -28,7 +28,7 @@ # but preserve the skill's identity. logger.info(f"Syncing system skill: {skill_def['name']}") existing.description = skill_def.get("description") - existing.skill_type = skill_def.get("skill_type") + existing.skill_type = skill_def.get("skill_type", "local") existing.is_enabled = skill_def.get("is_enabled", True) existing.features = skill_def.get("features", ["swarm_control"]) existing.extra_metadata = skill_def.get("extra_metadata", {}) @@ -41,7 +41,7 @@ target_skill = models.Skill( name=skill_def["name"], description=skill_def.get("description"), - skill_type=skill_def.get("skill_type"), + skill_type=skill_def.get("skill_type", "local"), is_enabled=skill_def.get("is_enabled", True), features=skill_def.get("features", ["swarm_control"]), extra_metadata=skill_def.get("extra_metadata", {}), diff --git a/deployment/test-nodes/docker-compose.test-nodes.yml b/deployment/test-nodes/docker-compose.test-nodes.yml index c9cba88..732f6b9 100644 --- a/deployment/test-nodes/docker-compose.test-nodes.yml +++ b/deployment/test-nodes/docker-compose.test-nodes.yml @@ -5,7 +5,7 @@ services: test-node-1: build: - context: ../../agent-node + context: ./agent-node container_name: cortex-test-1 environment: - AGENT_NODE_ID=test-node-1 @@ -29,7 +29,7 @@ test-node-2: build: - context: ../../agent-node + context: ./agent-node container_name: cortex-test-2 environment: - AGENT_NODE_ID=test-node-2 diff --git a/frontend/src/features/agents/components/AgentDrillDown.js b/frontend/src/features/agents/components/AgentDrillDown.js index e1fedbb..607e53f 100644 --- a/frontend/src/features/agents/components/AgentDrillDown.js +++ b/frontend/src/features/agents/components/AgentDrillDown.js @@ -38,11 +38,16 @@ const [coworkerContent, setCoworkerContent] = useState(""); const [historyLog, setHistoryLog] = useState([]); const [savingGroundTruth, setSavingGroundTruth] = useState(false); + const [selectedAuditId, setSelectedAuditId] = useState(null); // null means 'Live State' from node // Monitoring & Timer States const [runningSeconds, setRunningSeconds] = useState(0); const [lastTotalConsumption, setLastTotalConsumption] = useState(null); const [previousStatus, setPreviousStatus] = useState('idle'); + const [currentAction, setCurrentAction] = useState(null); + const [lastAction, setLastAction] = useState(null); + const [lastActionDuration, setLastActionDuration] = useState(null); + const [actionStartTime, setActionStartTime] = useState(0); // Helper: Convert cron expression to human-readable text const describeCron = (expr) => { @@ -126,7 +131,8 @@ sender: m.sender, timestamp: m.created_at, id: m.id, - tool_calls: m.tool_calls + tool_calls: m.tool_calls, + message_metadata: m.message_metadata })); setChatHistory(formatted); @@ -150,10 +156,12 @@ } catch(e) {} // Fetch Evaluation Hub data (.cortex/) - if (found.mesh_node_id && (found.session?.sync_workspace_id || found.session_id)) { + const sid = found.session?.sync_workspace_id || found.session_id; + const nodeId = found.mesh_node_id || "hub"; + + if (sid) { try { - const sid = found.session?.sync_workspace_id || found.session_id; - const cFilesListing = await getAgentCortexFiles(agentId, found.mesh_node_id, sid); + const cFilesListing = await getAgentCortexFiles(agentId, nodeId, sid); const files = cFilesListing.files || []; setCortexFiles(files); @@ -162,7 +170,7 @@ // Stream feedback.md if (fileExists("feedback.md")) { try { - const feedback = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "feedback.md"); + const feedback = await getAgentCortexFile(agentId, nodeId, sid, "feedback.md"); setFeedbackContent(feedback?.content || ""); } catch (e) {} } @@ -170,21 +178,21 @@ // Display rubric.md if (fileExists("rubric.md")) { try { - const rubric = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "rubric.md"); + const rubric = await getAgentCortexFile(agentId, nodeId, sid, "rubric.md"); setRubricContent(rubric?.content || ""); } catch (e) {} } // Display coworker.md (Ground Truth) try { - const coworker = await fetchWithAuth(`/agents/${agentId}/cortex/coworker.md?node_id=${found.mesh_node_id}&session_id=${sid}`); + const coworker = await getAgentCortexFile(agentId, nodeId, sid, ".coworker.md"); setCoworkerContent(coworker?.content || ""); } catch (e) {} // Display history.log if (fileExists("history.log")) { try { - const logs = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "history.log"); + const logs = await getAgentCortexFile(agentId, nodeId, sid, "history.log"); if (logs?.content) { try { const parsed = JSON.parse(logs.content); @@ -218,17 +226,37 @@ if (previousStatus !== 'active') { setRunningSeconds(0); setLastTotalConsumption(null); + setCurrentAction(null); + setLastAction(null); + setLastActionDuration(null); + setActionStartTime(0); } interval = setInterval(() => { setRunningSeconds(s => s + 1); }, 1000); + + // Track Action/Phase changes + const rawStatus = agent.evaluation_status || 'Orchestrating task payload...'; + const cleanStatus = rawStatus.toLowerCase().includes('audit') || rawStatus.toLowerCase().includes('worker') + ? `🛡️ Co-Worker Audit: ${rawStatus}` + : `🤖 Main Agent: ${rawStatus}`; + + if (rawStatus !== (currentAction?.raw || '')) { + // Shift current to last + if (currentAction) { + setLastAction(currentAction); + setLastActionDuration(runningSeconds - actionStartTime); + } + setCurrentAction({ display: cleanStatus, raw: rawStatus }); + setActionStartTime(runningSeconds); + } } else if (previousStatus === 'active' && agent?.status !== 'active') { // Captured finished state setLastTotalConsumption(runningSeconds); } setPreviousStatus(agent?.status || 'idle'); return () => clearInterval(interval); - }, [agent?.status, runningSeconds, previousStatus]); + }, [agent?.status, agent?.evaluation_status, runningSeconds, previousStatus, currentAction, actionStartTime]); const [skipEval, setSkipEval] = useState(false); @@ -266,6 +294,7 @@ } await clearSessionHistory(agent.session_id); setChatHistory([]); + setSelectedAuditId(null); fetchData(); } catch (err) { setModalConfig({ title: 'Clear Failed', message: err.message, type: 'error' }); @@ -350,11 +379,19 @@ try { setSavingGroundTruth(true); const sid = agent.session?.sync_workspace_id || agent.session_id; - await fetchWithAuth(`/agents/${agentId}/cortex/coworker.md?node_id=${agent.mesh_node_id}&session_id=${sid}`, { - method: "PUT", - body: { content: coworkerContent } + const nodeId = agent.mesh_node_id || "hub"; + + // Use the general node FS API to write .coworker.md + await fetchWithAuth(`/nodes/${nodeId}/fs/touch?X-User-ID=${userConfig?.id || 'agent_ui'}`, { + method: "POST", + body: { + path: ".coworker.md", + content: coworkerContent, + is_dir: false, + session_id: sid + } }); - setModalConfig({ title: 'Success', message: 'Swarm Ground Truth aligned across all agents.', type: 'success' }); + setModalConfig({ title: 'Success', message: 'Auditor Guidelines synced to node workspace.', type: 'success' }); fetchData(); } catch (err) { setModalConfig({ title: 'Update Failed', message: err.message, type: 'error' }); @@ -546,35 +583,37 @@ {/* Agent Status Indicator Label (M4 Observability) */} {(agent?.status === 'active' || lastTotalConsumption !== null) && ( -
+
{entry.reason || entry.message}
{entry.sub_events?.length > 0 && ( -+
{entry.details}
)}