diff --git a/ai-hub/app/api/dependencies.py b/ai-hub/app/api/dependencies.py index 4946e46..11de447 100644 --- a/ai-hub/app/api/dependencies.py +++ b/ai-hub/app/api/dependencies.py @@ -32,13 +32,15 @@ # HARDENING: In production, X-User-ID must be verified via a shared secret from the proxy from app.config import settings - if settings.SECRET_KEY and settings.SECRET_KEY not in ["dev", "generate-me"]: - if not x_proxy_secret or x_proxy_secret != settings.SECRET_KEY: - logging.warning(f"Invalid X-Proxy-Secret from {x_user_id}. Identity claim rejected.") - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Invalid Proxy Secret. Identity claim rejected." - ) + if settings.SECRET_KEY and settings.SECRET_KEY not in ["dev", "generate-me", "dev-secret-key-1337"]: + # Strict enforcement only if OIDC is disabled or if the secret is provided (to verify it) + if not settings.OIDC_ENABLED or x_proxy_secret: + if not x_proxy_secret or x_proxy_secret != settings.SECRET_KEY: + logging.warning(f"Invalid X-Proxy-Secret from {x_user_id}. Identity claim rejected.") + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Invalid Proxy Secret. Identity claim rejected." + ) user = db.query(models.User).filter(models.User.id == x_user_id).first() if not user: diff --git a/ai-hub/app/api/schemas.py b/ai-hub/app/api/schemas.py index 287a0a7..d5b1473 100644 --- a/ai-hub/app/api/schemas.py +++ b/ai-hub/app/api/schemas.py @@ -296,6 +296,9 @@ # The timestamp for when the message was created. created_at: datetime + # Metadata such as quality audits, tool usage stats, etc. + message_metadata: Optional[dict] = None + # URL to the saved audio file audio_url: Optional[str] = None # Whether audio exists for this message diff --git a/ai-hub/app/core/orchestration/agent_loop.py b/ai-hub/app/core/orchestration/agent_loop.py index c78442a..a387693 100644 --- a/ai-hub/app/core/orchestration/agent_loop.py +++ b/ai-hub/app/core/orchestration/agent_loop.py @@ -171,6 +171,9 @@ current_attempt = 0 final_result = None + if evaluator: + await evaluator.log_event("Execution Initialized", "Agent loop warming up for primary task execution.") + # --- MAIN REWORK LOOP --- loop_start = time.time() # Handle scope for exception reporting while current_attempt <= max_rework_attempts: @@ -185,6 +188,7 @@ final_input_tokens = 0 final_output_tokens = 0 final_answer = "" + last_assistant_msg_id = None instance = db.query(AgentInstance).filter(AgentInstance.id == agent_id).first() instance.last_reasoning = "" @@ -210,6 +214,7 @@ user_service=user_service ): if event.get("type") == "finish": + last_assistant_msg_id = event.get("message_id") final_tool_counts = event.get("tool_counts", {}) final_input_tokens = event.get("input_tokens", 0) final_output_tokens = event.get("output_tokens", 0) @@ -278,7 +283,13 @@ if registry and instance.mesh_node_id: registry.emit(instance.mesh_node_id, "status_update", {"evaluation_status": instance.evaluation_status}) - blind_eval = await evaluator.evaluate_blind(prompt, rubric_content, final_answer) + # transparency context for Auditor + partner_ctx = { + "system_prompt": template.system_prompt_content, + "skills": [s.name for s in (agent_session.skills if agent_session and agent_session.skills else [])] + } + + blind_eval = await evaluator.evaluate_blind(prompt, rubric_content, final_answer, partner_context=partner_ctx) score = blind_eval.get("score", 0) justification = blind_eval.get("justification", "") blind_duration = blind_eval.get("duration", 0) @@ -306,6 +317,28 @@ if len(summary_reason) > 250: summary_reason = summary_reason[:247] + "..." await evaluator.log_round(current_attempt + 1, score, summary_reason, "Final answer passed quality gate.", sub_events=round_sub_events, duration=total_success_duration) + + # PERSISTENCE: Save this audit to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": success_feedback, + "history": current_history, + "score": score, + "passed": True + } + } + }) + if not safe_commit(): return + break # Success! # Check Rework Limits @@ -320,6 +353,28 @@ if len(summary_reason) > 250: summary_reason = summary_reason[:247] + "..." await evaluator.log_round(current_attempt + 1, score, summary_reason, "Failed quality gate after max attempts.", sub_events=round_sub_events, duration=total_fail_duration) + + # PERSISTENCE: Save this status to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": f"# Evaluation Failed (Max Attempts)\n\n**Score**: {score}/100\n\n**Justification**:\n{justification}", + "history": current_history, + "score": score, + "passed": False + } + } + }) + if not safe_commit(): return + break # No more reworks # Stage Delta (Gap Analysis) @@ -341,7 +396,7 @@ directive_feedback = await evaluator.generate_compaction_summary(prompt, hist_log) round_sub_events.append({"name": "Context compaction", "duration": round(time.time() - delta_start, 2), "timestamp": time.time()}) else: - directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning) + directive_feedback = await evaluator.evaluate_delta(prompt, rubric_content, justification, hist_log, final_reasoning, partner_context=partner_ctx) round_sub_events.append({"name": "Delta analysis", "duration": round(time.time() - delta_start, 2), "timestamp": time.time()}) # M3: Categorization & Duration Metrics @@ -359,6 +414,27 @@ # Log this round with summary and duration await evaluator.log_round(current_attempt + 1, score, summary_reason, directive_feedback, sub_events=round_sub_events, duration=total_round_duration) + # PERSISTENCE: Save this audit to the message for historical drill-down + if last_assistant_msg_id: + current_history = [] + try: + cmd_res = evaluator.assistant.dispatch_single(evaluator.mesh_node_id, "cat .cortex/history.log", session_id=evaluator.sync_workspace_id) + current_history = json.loads(cmd_res.get("stdout", "[]")) + except: pass + + db.query(Message).filter(Message.id == last_assistant_msg_id).update({ + "message_metadata": { + "evaluation": { + "rubric": rubric_content, + "feedback": full_audit_stream, + "history": current_history, + "score": score, + "passed": False + } + } + }) + if not safe_commit(): return + # Trigger next iteration current_prompt = f"### CO-WORKER DIRECTIVE (ATTEMPT {current_attempt + 1})\n\n{directive_feedback}\n\nProceed with rework." current_attempt += 1 @@ -417,6 +493,10 @@ instance.last_reasoning = None if not safe_commit(): return + if evaluator: + total_elapsed = time.time() - loop_start + await evaluator.log_event("Process Completed", f"Lifecycle finished successfully after {current_attempt + 1} rounds.", duration=total_elapsed) + return final_result except Exception as e: diff --git a/ai-hub/app/core/orchestration/harness_evaluator.py b/ai-hub/app/core/orchestration/harness_evaluator.py index 5c71266..2f88e0b 100644 --- a/ai-hub/app/core/orchestration/harness_evaluator.py +++ b/ai-hub/app/core/orchestration/harness_evaluator.py @@ -239,15 +239,28 @@ logger.error(f"[HarnessEvaluator] Rubric generation failed: {e}") return None - async def evaluate_blind(self, initial_prompt: str, rubric_content: str, result_content: str) -> Dict[str, Any]: + async def evaluate_blind(self, initial_prompt: str, rubric_content: str, result_content: str, partner_context: Dict = None) -> Dict[str, Any]: """Stage 2A: The Blind Rating (Absolute Objectivity). Uses tools to inspect results.""" start = time.time() + partner_info = "" + if partner_context: + partner_info = f""" +## PARTNER PROFILE (Main Agent context): +SYSTEM PROMPT: +{partner_context.get('system_prompt', 'N/A')} + +AVAILABLE SKILLS (Tools): +{partner_context.get('skills', 'N/A')} +--- +""" + system_prompt = f"""You are the Co-Worker Evaluator (Blind Auditor). Your goal is to perform a BLIND evaluation of the Main Agent's work. You have NO knowledge of previous rounds or internal reasoning. You only see the goal and the result. Original Request: {initial_prompt} +{partner_info} Current Result: --- @@ -272,7 +285,7 @@ res["duration"] = time.time() - start return res - async def evaluate_delta(self, initial_prompt: str, rubric_content: str, blind_justification: str, history_log: List[Dict[str, Any]], transcript: str) -> str: + async def evaluate_delta(self, initial_prompt: str, rubric_content: str, blind_justification: str, history_log: List[Dict[str, Any]], transcript: str, partner_context: Dict = None) -> str: """Stage 2B: The Delta Analysis. Identifies gaps by comparing result to reasoning transcript.""" start = time.time() @@ -300,10 +313,21 @@ {transcript} --- +## PARTNER PROFILE (Main Agent context): +SYSTEM PROMPT: +{partner_context.get('system_prompt', 'N/A') if partner_context else 'N/A'} + +AVAILABLE SKILLS (Tools): +{partner_context.get('skills', 'N/A') if partner_context else 'N/A'} +--- + MISSION: -1. Identify the 'Delta' (what was intended vs. what was actually committed). -2. Spot 'Gold-Plating' (did they do extra work not asked for?). -3. Format feedback as ACTIONABLE COMMANDS (Directives). +1. Compare the Blind Justification with the Execution Transcript. +2. Identify why the Agent failed to meet the criteria despite its reasoning. +3. Identify the 'Delta' (what was intended vs. what was actually committed). +4. Spot 'Gold-Plating' (did they do extra work not asked for?). +5. Format feedback as ACTIONABLE COMMANDS (Directives). +6. If this is a repeat failure, provide a solution sketch or absolute directive. Example: "Directive: Refactor auth.py:L10 to use settings.API_KEY instead of hardcoded string." Format as Markdown. Start with '# Rework Instructions'.""" diff --git a/ai-hub/app/core/skills/bootstrap.py b/ai-hub/app/core/skills/bootstrap.py index 7e276ca..af5c3fd 100644 --- a/ai-hub/app/core/skills/bootstrap.py +++ b/ai-hub/app/core/skills/bootstrap.py @@ -28,7 +28,7 @@ # but preserve the skill's identity. logger.info(f"Syncing system skill: {skill_def['name']}") existing.description = skill_def.get("description") - existing.skill_type = skill_def.get("skill_type") + existing.skill_type = skill_def.get("skill_type", "local") existing.is_enabled = skill_def.get("is_enabled", True) existing.features = skill_def.get("features", ["swarm_control"]) existing.extra_metadata = skill_def.get("extra_metadata", {}) @@ -41,7 +41,7 @@ target_skill = models.Skill( name=skill_def["name"], description=skill_def.get("description"), - skill_type=skill_def.get("skill_type"), + skill_type=skill_def.get("skill_type", "local"), is_enabled=skill_def.get("is_enabled", True), features=skill_def.get("features", ["swarm_control"]), extra_metadata=skill_def.get("extra_metadata", {}), diff --git a/deployment/test-nodes/docker-compose.test-nodes.yml b/deployment/test-nodes/docker-compose.test-nodes.yml index c9cba88..732f6b9 100644 --- a/deployment/test-nodes/docker-compose.test-nodes.yml +++ b/deployment/test-nodes/docker-compose.test-nodes.yml @@ -5,7 +5,7 @@ services: test-node-1: build: - context: ../../agent-node + context: ./agent-node container_name: cortex-test-1 environment: - AGENT_NODE_ID=test-node-1 @@ -29,7 +29,7 @@ test-node-2: build: - context: ../../agent-node + context: ./agent-node container_name: cortex-test-2 environment: - AGENT_NODE_ID=test-node-2 diff --git a/frontend/src/features/agents/components/AgentDrillDown.js b/frontend/src/features/agents/components/AgentDrillDown.js index e1fedbb..607e53f 100644 --- a/frontend/src/features/agents/components/AgentDrillDown.js +++ b/frontend/src/features/agents/components/AgentDrillDown.js @@ -38,11 +38,16 @@ const [coworkerContent, setCoworkerContent] = useState(""); const [historyLog, setHistoryLog] = useState([]); const [savingGroundTruth, setSavingGroundTruth] = useState(false); + const [selectedAuditId, setSelectedAuditId] = useState(null); // null means 'Live State' from node // Monitoring & Timer States const [runningSeconds, setRunningSeconds] = useState(0); const [lastTotalConsumption, setLastTotalConsumption] = useState(null); const [previousStatus, setPreviousStatus] = useState('idle'); + const [currentAction, setCurrentAction] = useState(null); + const [lastAction, setLastAction] = useState(null); + const [lastActionDuration, setLastActionDuration] = useState(null); + const [actionStartTime, setActionStartTime] = useState(0); // Helper: Convert cron expression to human-readable text const describeCron = (expr) => { @@ -126,7 +131,8 @@ sender: m.sender, timestamp: m.created_at, id: m.id, - tool_calls: m.tool_calls + tool_calls: m.tool_calls, + message_metadata: m.message_metadata })); setChatHistory(formatted); @@ -150,10 +156,12 @@ } catch(e) {} // Fetch Evaluation Hub data (.cortex/) - if (found.mesh_node_id && (found.session?.sync_workspace_id || found.session_id)) { + const sid = found.session?.sync_workspace_id || found.session_id; + const nodeId = found.mesh_node_id || "hub"; + + if (sid) { try { - const sid = found.session?.sync_workspace_id || found.session_id; - const cFilesListing = await getAgentCortexFiles(agentId, found.mesh_node_id, sid); + const cFilesListing = await getAgentCortexFiles(agentId, nodeId, sid); const files = cFilesListing.files || []; setCortexFiles(files); @@ -162,7 +170,7 @@ // Stream feedback.md if (fileExists("feedback.md")) { try { - const feedback = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "feedback.md"); + const feedback = await getAgentCortexFile(agentId, nodeId, sid, "feedback.md"); setFeedbackContent(feedback?.content || ""); } catch (e) {} } @@ -170,21 +178,21 @@ // Display rubric.md if (fileExists("rubric.md")) { try { - const rubric = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "rubric.md"); + const rubric = await getAgentCortexFile(agentId, nodeId, sid, "rubric.md"); setRubricContent(rubric?.content || ""); } catch (e) {} } // Display coworker.md (Ground Truth) try { - const coworker = await fetchWithAuth(`/agents/${agentId}/cortex/coworker.md?node_id=${found.mesh_node_id}&session_id=${sid}`); + const coworker = await getAgentCortexFile(agentId, nodeId, sid, ".coworker.md"); setCoworkerContent(coworker?.content || ""); } catch (e) {} // Display history.log if (fileExists("history.log")) { try { - const logs = await getAgentCortexFile(agentId, found.mesh_node_id, sid, "history.log"); + const logs = await getAgentCortexFile(agentId, nodeId, sid, "history.log"); if (logs?.content) { try { const parsed = JSON.parse(logs.content); @@ -218,17 +226,37 @@ if (previousStatus !== 'active') { setRunningSeconds(0); setLastTotalConsumption(null); + setCurrentAction(null); + setLastAction(null); + setLastActionDuration(null); + setActionStartTime(0); } interval = setInterval(() => { setRunningSeconds(s => s + 1); }, 1000); + + // Track Action/Phase changes + const rawStatus = agent.evaluation_status || 'Orchestrating task payload...'; + const cleanStatus = rawStatus.toLowerCase().includes('audit') || rawStatus.toLowerCase().includes('worker') + ? `🛡️ Co-Worker Audit: ${rawStatus}` + : `🤖 Main Agent: ${rawStatus}`; + + if (rawStatus !== (currentAction?.raw || '')) { + // Shift current to last + if (currentAction) { + setLastAction(currentAction); + setLastActionDuration(runningSeconds - actionStartTime); + } + setCurrentAction({ display: cleanStatus, raw: rawStatus }); + setActionStartTime(runningSeconds); + } } else if (previousStatus === 'active' && agent?.status !== 'active') { // Captured finished state setLastTotalConsumption(runningSeconds); } setPreviousStatus(agent?.status || 'idle'); return () => clearInterval(interval); - }, [agent?.status, runningSeconds, previousStatus]); + }, [agent?.status, agent?.evaluation_status, runningSeconds, previousStatus, currentAction, actionStartTime]); const [skipEval, setSkipEval] = useState(false); @@ -266,6 +294,7 @@ } await clearSessionHistory(agent.session_id); setChatHistory([]); + setSelectedAuditId(null); fetchData(); } catch (err) { setModalConfig({ title: 'Clear Failed', message: err.message, type: 'error' }); @@ -350,11 +379,19 @@ try { setSavingGroundTruth(true); const sid = agent.session?.sync_workspace_id || agent.session_id; - await fetchWithAuth(`/agents/${agentId}/cortex/coworker.md?node_id=${agent.mesh_node_id}&session_id=${sid}`, { - method: "PUT", - body: { content: coworkerContent } + const nodeId = agent.mesh_node_id || "hub"; + + // Use the general node FS API to write .coworker.md + await fetchWithAuth(`/nodes/${nodeId}/fs/touch?X-User-ID=${userConfig?.id || 'agent_ui'}`, { + method: "POST", + body: { + path: ".coworker.md", + content: coworkerContent, + is_dir: false, + session_id: sid + } }); - setModalConfig({ title: 'Success', message: 'Swarm Ground Truth aligned across all agents.', type: 'success' }); + setModalConfig({ title: 'Success', message: 'Auditor Guidelines synced to node workspace.', type: 'success' }); fetchData(); } catch (err) { setModalConfig({ title: 'Update Failed', message: err.message, type: 'error' }); @@ -546,35 +583,37 @@ {/* Agent Status Indicator Label (M4 Observability) */} {(agent?.status === 'active' || lastTotalConsumption !== null) && ( -
-
-
-
+
+
+
+
{agent?.status === 'active' &&
}
-
- - {agent?.status === 'active' ? "Internal Trace:" : "Last Action:"} - - +
+ Current Action: + {agent?.status === 'active' - ? (agent.evaluation_status - ? (agent.evaluation_status.toLowerCase().includes('audit') || agent.evaluation_status.toLowerCase().includes('worker') - ? `🛡️ Co-Worker Audit: ${agent.evaluation_status}` - : `🤖 Main Agent: ${agent.evaluation_status}`) - : '🤖 Main Agent: Orchestrating task payload...') - : '🏁 Task execution cycle completed successfully.'} + ? (agent.evaluation_status || 'Orchestrating Workflow...') + : 'Process Lifecycle Ended'}
-
-
+
+ +
+
+
+ Last Action: + + {lastAction?.display ? lastAction.display.split(':').pop().trim() : "Initialized"} + + {lastActionDuration !== null && ( + + {lastActionDuration}s + + )} +
+
+
{agent?.status === 'active' ? (
@@ -1112,45 +1151,113 @@ -
+
)} - {activeTab === 'evaluation' && ( -
- {/* Swarm Ground Truth Editor (Aligned Discovery) */} -
-
- - - Swarm Ground Truth (Aligned Knowledge Base) - -
- .coworker.md + {activeTab === 'evaluation' && (() => { + const messagesWithAudit = chatHistory.filter(m => m.message_metadata?.evaluation && !m.isUser); + const selectedMessage = selectedAuditId ? chatHistory.find(m => m.id === selectedAuditId) : null; + + const displayFeedback = selectedMessage ? selectedMessage.message_metadata.evaluation.feedback : feedbackContent; + const displayRubric = selectedMessage ? selectedMessage.message_metadata.evaluation.rubric : rubricContent; + const displayHistory = selectedMessage ? selectedMessage.message_metadata.evaluation.history : historyLog; + + return ( +
+ {/* Inspection Controls */} +
+
+ Audit Mode +
+
-
+ + {selectedAuditId && ( +
+ Select Historical Message + +
+ )} + +
+ Terminal Inspector +
+ {selectedAuditId ? `SNAPSHOT: MSID-${selectedAuditId}` : "LIVE: .CORTEX/"} +
+
+
+ + {/* Auditor Guidelines Editor (Oversight Policy) */} +
+
+
+ + + Auditor Guidelines + + Ground Truth Policy Layer +
+
+ .coworker.md + +
+
+
-
- - This context is injected into both the Main Agent and Co-Worker Auditor. - - - Mirrors ClaudeCode Discovery Style 🛰️ +
+
+
+
A
+
W
+
+ + Oversight Policy: Shared between Auditor & Worker. + +
+ + Autonomous Oversight 🛰️
@@ -1158,106 +1265,119 @@
{/* Evaluation Strategy (Rubric) */} -
-
- - 📜 Quality Rubric +
+
+ + + Quality Rubric - rubric.md + rubric.md
-
- {rubricContent || No rubric configuration found for this agent instance.} +
+ {displayRubric || No rubric configuration found for this agent instance.}
{/* Live feedback loop (Feedback) */} -
-
- -
- Co-Worker Feedback Stream +
+
+ +
+ {selectedAuditId ? "Audit Snapshot" : "Audit Stream"}
- feedback.md + {selectedAuditId ? `MSID-${selectedAuditId}` : "feedback.md"}
-
- {feedbackContent || Waiting for next evaluation cycle to populate feedback...} +
+ {displayFeedback || Waiting for next evaluation cycle to populate feedback...}
{/* Rework History Viewer */} -
-
- - 🕒 Rework History Timeline +
+
+ + + History Timeline
- history.log + history.log
-
-
- {(historyLog && historyLog.length > 0) ? [...historyLog].reverse().map((entry, idx) => ( -
+
+
+ {(displayHistory && displayHistory.length > 0) ? [...displayHistory].reverse().map((entry, idx) => ( +
-
-
= (editConfig?.rework_threshold || 80) ? 'bg-emerald-500 shadow-[0_0_8px_rgba(16,185,129,0.5)]' : 'bg-amber-500 shadow-[0_0_8px_rgba(245,158,11,0.5)]') : 'bg-indigo-300 dark:bg-indigo-900/50'}`}>
- - {entry.type === 'attempt' ? `Attempt ${entry.round}` : entry.name || 'Event'} - {entry.duration && ({entry.duration}s)} +
+
= (editConfig?.rework_threshold || 80) ? 'bg-emerald-500 shadow-[0_0_10px_rgba(16,185,129,0.6)]' : 'bg-amber-500 shadow-[0_0_10px_rgba(245,158,11,0.6)]') : 'bg-indigo-500/50'}`}>
+ + {entry.type === 'attempt' ? `Attempt Round ${entry.round}` : entry.name || 'Event'} + {entry.duration && took {entry.duration}s}
-
- - {entry.timestamp ? new Date(entry.timestamp * (entry.timestamp < 2000000000 ? 1000 : 1)).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }) : 'Recent'} +
+ + {entry.timestamp ? new Date(entry.timestamp * (entry.timestamp < 2000000000 ? 1000 : 1)).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' }) : 'Pending'} {entry.score !== undefined && ( - = (editConfig?.rework_threshold || 80) ? 'bg-emerald-500/10 text-emerald-500' : 'bg-amber-500/10 text-amber-500'}`}> - Score: {entry.score}% - +
= (editConfig?.rework_threshold || 80) ? 'bg-emerald-500/10 text-emerald-500 border border-emerald-500/20' : 'bg-amber-500/10 text-amber-500 border border-amber-500/20'}`}> + {entry.score}% PRECISION +
)}
{entry.type === 'attempt' ? ( - <> -

+

+

{entry.reason || entry.message}

{entry.sub_events?.length > 0 && ( -
+
{entry.sub_events.map((sub, sidx) => ( -
-
- {sub.name} {sub.duration}s +
+
+ {sub.name} + {sub.duration}ₛ
))}
)} - +
) : ( -

+

{entry.details}

)}
)) : ( -
- No autonomous activity recorded yet. +
+ + No activity recorded
)}
+ {displayHistory && displayHistory.length > 0 && ( +
+ Aggregate Time Cost + + {displayHistory.reduce((acc, entry) => acc + (entry.duration || 0), 0).toFixed(2)}s + +
+ )}
- )} + ); + })()} {activeTab === 'metrics' && (
diff --git a/frontend/src/features/chat/components/ChatWindow.js b/frontend/src/features/chat/components/ChatWindow.js index 2d59cd9..a7c194a 100644 --- a/frontend/src/features/chat/components/ChatWindow.js +++ b/frontend/src/features/chat/components/ChatWindow.js @@ -109,6 +109,9 @@ return null; } + // Use per-message metadata if available; for the latest assistant message, fallback to session-wide 'latest' metadata + const effectiveEval = message.message_metadata?.evaluation || evaluationMetadata; + return (
{/* Status indicator moved to top/bottom for better visibility */} @@ -145,59 +148,50 @@
{/* Per-Answer Evaluation Metadata (Area 4: Quality Rubric / Feedback / History) */} - {!message.isUser && evaluationMetadata && (evaluationMetadata.rubric || evaluationMetadata.feedback || (evaluationMetadata.history && evaluationMetadata.history.length > 0)) && ( + {!message.isUser && effectiveEval && (effectiveEval.rubric || effectiveEval.feedback || (effectiveEval.history && effectiveEval.history.length > 0)) && (
- +
📜 View Quality Audit & Evaluation
- e.stopPropagation()} - > - OPEN EVALUATION HUB ↗ -
{/* Rubric snippet */} - {evaluationMetadata.rubric && ( + {effectiveEval.rubric && (
📝 Quality Rubric
- {evaluationMetadata.rubric} + {effectiveEval.rubric}
)} {/* Feedback snippet */} - {evaluationMetadata.feedback && ( + {effectiveEval.feedback && (
🤖 Co-Worker Feedback
- {evaluationMetadata.feedback} + {effectiveEval.feedback}
)}
{/* History Timeline snippet */} - {evaluationMetadata.history && evaluationMetadata.history.length > 0 && ( + {effectiveEval.history && effectiveEval.history.length > 0 && (
🕒 Rework History Timeline
- {evaluationMetadata.history.map((h, i) => ( + {effectiveEval.history.map((h, i) => (
@@ -375,61 +369,67 @@ style={{ maxHeight: maxHeight, overflowY: 'auto' }} className="px-4 md:px-6 py-6 space-y-6 bg-transparent chat-history-container min-h-full" > - {chatHistory.map((message, index) => { - const isLastMessage = index === chatHistory.length - 1; - const isSystemLog = message.sender === "system" && (message.text?.includes("Co-Worker") || message.text?.includes("Attempt")); - const shouldCollapse = (autoCollapse && !isLastMessage && !message.isUser && !expandedIndices[index]) || (isSystemLog && !expandedIndices[index]); + {(() => { + const lastAssistantIndex = chatHistory.slice().reverse().findIndex(m => m.sender === 'assistant' || m.sender === 'assistant'); // Logic for assistant + const actualLastAssistantIndex = lastAssistantIndex !== -1 ? chatHistory.length - 1 - lastAssistantIndex : -1; - return ( -
- {shouldCollapse ? ( - - ) : ( -
-
- { - setActivePlayingId(id); - if (id && onAudioPlay) { - onAudioPlay(); // Notify parent to stop streaming (to prevent overlap) - } - }} - evaluationMetadata={evaluationMetadata} - /> + return chatHistory.map((message, index) => { + const isLastMessage = index === chatHistory.length - 1; + const isLastAssistantMessage = index === actualLastAssistantIndex; + const isSystemLog = message.sender === "system" && (message.text?.includes("Co-Worker") || message.text?.includes("Attempt")); + const shouldCollapse = (autoCollapse && !isLastMessage && !message.isUser && !expandedIndices[index]) || (isSystemLog && !expandedIndices[index]); + + return ( +
+ {shouldCollapse ? ( + + ) : ( +
+
+ { + setActivePlayingId(id); + if (id && onAudioPlay) { + onAudioPlay(); // Notify parent to stop streaming (to prevent overlap) + } + }} + evaluationMetadata={isLastAssistantMessage ? evaluationMetadata : null} + /> +
+ {autoCollapse && !isLastMessage && !message.isUser && expandedIndices[index] && ( + + )}
- {autoCollapse && !isLastMessage && !message.isUser && expandedIndices[index] && ( - - )} -
- )} -
- ); - })} + )} +
+ ); + }) + })()}
); }; diff --git a/frontend/src/services/api/agentService.js b/frontend/src/services/api/agentService.js index 85b74bd..8cbb1f5 100644 --- a/frontend/src/services/api/agentService.js +++ b/frontend/src/services/api/agentService.js @@ -74,6 +74,8 @@ * [Evaluation Hub] Reading specific file from .cortex/ (e.g. feedback.md, history.log). */ export const getAgentCortexFile = async (agentId, nodeId, sessionId, fileName) => { - const params = new URLSearchParams({ path: `.cortex/${fileName}`, session_id: sessionId }); + // Distinguish between root guidelines and .cortex/ artifacts + const filePath = fileName.startsWith('.') ? fileName : `.cortex/${fileName}`; + const params = new URLSearchParams({ path: filePath, session_id: sessionId }); return await fetchWithAuth(`/nodes/${nodeId}/fs/cat?${params.toString()}`); };