diff --git a/ai-hub/app/api/routes/sessions.py b/ai-hub/app/api/routes/sessions.py index ee7276c..127477e 100644 --- a/ai-hub/app/api/routes/sessions.py +++ b/ai-hub/app/api/routes/sessions.py @@ -83,24 +83,38 @@ raise HTTPException(status_code=500, detail=f"Failed to create session: {e}") - @router.post("/{session_id}/chat", response_model=schemas.ChatResponse, summary="Send a Message in a Session") + @router.post("/{session_id}/chat", summary="Send a Message in a Session (Streaming)") async def chat_in_session( session_id: int, request: schemas.ChatRequest, db: Session = Depends(get_db) ): - try: - response_text, provider_used, message_id = await services.rag_service.chat_with_rag( - db=db, - session_id=session_id, - prompt=request.prompt, - provider_name=request.provider_name, - load_faiss_retriever=request.load_faiss_retriever, - user_service=services.user_service - ) - return schemas.ChatResponse(answer=response_text, provider_used=provider_used, message_id=message_id) - except Exception as e: - raise HTTPException(status_code=500, detail=f"An error occurred during chat: {e}") + """ + Streams AI response using Server-Sent Events (SSE). + Yields tokens, reasoning, and tool executions in real-time. + """ + from fastapi.responses import StreamingResponse + import json + + async def event_generator(): + try: + # chat_with_rag is now an AsyncGenerator + async for event in services.rag_service.chat_with_rag( + db=db, + session_id=session_id, + prompt=request.prompt, + provider_name=request.provider_name, + load_faiss_retriever=request.load_faiss_retriever, + user_service=services.user_service + ): + yield f"data: {json.dumps(event)}\n\n" + except Exception as e: + import logging + logging.exception("Error in chat streaming") + yield f"data: {json.dumps({'type': 'error', 'content': str(e)})}\n\n" + + return StreamingResponse(event_generator(), media_type="text/event-stream") + @router.get("/{session_id}/messages", response_model=schemas.MessageHistoryResponse, summary="Get Session Chat History") def get_session_messages(session_id: int, db: Session = Depends(get_db)): diff --git a/ai-hub/app/api/schemas.py b/ai-hub/app/api/schemas.py index 1f6bac5..3a979e5 100644 --- a/ai-hub/app/api/schemas.py +++ b/ai-hub/app/api/schemas.py @@ -230,8 +230,11 @@ sender: Literal["user", "assistant"] # The text content of the message. content: str + # The progressive reasoning or 'thinking' step for models that support it + reasoning_content: Optional[str] = None # The timestamp for when the message was created. created_at: datetime + # URL to the saved audio file audio_url: Optional[str] = None # Whether audio exists for this message diff --git a/ai-hub/app/core/pipelines/rag_pipeline.py b/ai-hub/app/core/pipelines/rag_pipeline.py index 9dcfaee..445adad 100644 --- a/ai-hub/app/core/pipelines/rag_pipeline.py +++ b/ai-hub/app/core/pipelines/rag_pipeline.py @@ -69,7 +69,7 @@ user_id: Optional[str] = None, feature_name: str = "chat", prompt_slug: str = "rag-pipeline" - ) -> str: + ): logging.debug(f"[RagPipeline.forward] Received question: '{question}'") if not llm_provider: @@ -95,7 +95,6 @@ ) # 1. Prepare initial messages - # We put the 'question' as the user message and use 'system_prompt' for instructions/context messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": question} @@ -103,13 +102,12 @@ # 2. Agentic Tool Loop (Max 5 turns to prevent infinite loops) for turn in range(5): - request_kwargs = {} + request_kwargs = {"stream": True} if tools: request_kwargs["tools"] = tools request_kwargs["tool_choice"] = "auto" model = getattr(llm_provider, "model_name", "unknown") - # Safely calculate total characters in messages, handling None content msg_lens = [] for m in messages: content = "" @@ -122,59 +120,97 @@ total_chars = sum(msg_lens) tool_count = len(tools) if tools else 0 - logging.info(f"[RagPipeline] Turn {turn+1} starting. Model: {model}, Messages: {len(messages)}, Total Chars: {total_chars}, Tools: {tool_count}") - # Log the specific turn metadata for diagnostics - logging.debug(f"[RagPipeline] Turn {turn+1} Payload Metadata: {messages}") + logging.info(f"[RagPipeline] Turn {turn+1} starting (STREAMING). Model: {model}, Messages: {len(messages)}, Total Chars: {total_chars}") + # LiteLLM streaming call prediction = await llm_provider.acompletion(messages=messages, **request_kwargs) - choices = getattr(prediction, "choices", None) - if not choices or len(choices) == 0: - finish_reason = getattr(prediction, "finish_reason", "unknown") - if choices is not None and len(choices) == 0: - # Some providers return empty list for safety filters - logging.warning(f"[RagPipeline.forward] LLM ({model}) returned 0 choices. Turn: {turn+1}. Filter/Safety Trigger likely.") + accumulated_content = "" + accumulated_reasoning = "" + tool_calls_map = {} # index -> tc object + + async for chunk in prediction: + if not chunk.choices: continue + delta = chunk.choices[0].delta - logging.error(f"[RagPipeline.forward] LLM ({model}) failed at Turn {turn+1}. Reason: {finish_reason}. Full Response: {prediction}") - return (f"The AI provider ({model}) returned an empty response (Turn {turn+1}). " - f"Reason: {finish_reason}. Context: {total_chars} chars, {tool_count} tools. " - "This is often a safety filter blocking the prompt.") + # A. Handle Reasoning (Thinking) + # Some models use 'reasoning_content' (OpenAI-compatible / DeepSeek) + reasoning = getattr(delta, "reasoning_content", None) or delta.get("reasoning_content") + if reasoning: + accumulated_reasoning += reasoning + yield {"type": "reasoning", "content": reasoning} + + # B. Handle Content + content = getattr(delta, "content", None) or delta.get("content") + if content: + accumulated_content += content + yield {"type": "content", "content": content} + + # C. Handle Tool Calls + tool_calls = getattr(delta, "tool_calls", None) or delta.get("tool_calls") + if tool_calls: + for tc_delta in tool_calls: + idx = tc_delta.index + if idx not in tool_calls_map: + tool_calls_map[idx] = tc_delta + else: + # Accumulate arguments + if tc_delta.function.arguments: + tool_calls_map[idx].function.arguments += tc_delta.function.arguments - message = prediction.choices[0].message - - # If no tool calls, we are done - if not getattr(message, "tool_calls", None): - raw_response = message.content or "" - if self.response_postprocessor: - return self.response_postprocessor(raw_response) - return raw_response + # Process completed turn + if not tool_calls_map: + # If no tools, this is the final answer for this forward pass. + # However, we might have accumulated content/reasoning. + # We stop the loop and return. + return - # Process tool calls - messages.append(message) # Add assistant message with tool_calls + # Reconstruct the tool call list and message object for the next turn + processed_tool_calls = list(tool_calls_map.values()) - for tool_call in message.tool_calls: - func_name = tool_call.function.name + # Format as an OpenAI-style message for the history + # But await completion returns thin models, we need a serializable dict/object + assistant_msg = { + "role": "assistant", + "content": accumulated_content or None, + "tool_calls": processed_tool_calls + } + if accumulated_reasoning: + assistant_msg["reasoning_content"] = accumulated_reasoning + + messages.append(assistant_msg) + + for tc in processed_tool_calls: + func_name = tc.function.name func_args = {} try: import json - func_args = json.loads(tool_call.function.arguments) + func_args = json.loads(tc.function.arguments) except: pass + yield {"type": "status", "content": f"AI decided to use tool: {func_name}"} logging.info(f"[🔧] Agent calling tool: {func_name} with {func_args}") if tool_service: + # Notify UI about tool execution start + yield {"type": "tool_start", "name": func_name, "args": func_args} + result = await tool_service.call_tool(func_name, func_args, db=db, user_id=user_id) else: result = {"success": False, "error": "Tool service not available"} + # Stream the result back so UI can see "behind the scenes" + yield {"type": "tool_result", "name": func_name, "result": result} + messages.append({ "role": "tool", - "tool_call_id": tool_call.id, + "tool_call_id": tc.id, "name": func_name, - "content": str(result) + "content": json.dumps(result) if isinstance(result, dict) else str(result) }) - return "Agent loop reached maximum turns without a final response." + yield {"type": "error", "content": "Agent loop reached maximum turns without a final response."} + def _build_prompt(self, context, history, question): return f"""Generate a natural and context-aware answer to the user's question using the provided knowledge and conversation history. diff --git a/ai-hub/app/core/services/rag.py b/ai-hub/app/core/services/rag.py index 949baf2..a58d6c0 100644 --- a/ai-hub/app/core/services/rag.py +++ b/ai-hub/app/core/services/rag.py @@ -28,9 +28,10 @@ load_faiss_retriever: bool = False, user_service = None, user_id: Optional[str] = None - ) -> Tuple[str, str, int]: + ): """ - Processes a user prompt within a session, saves the chat history, and returns a response. + Processes a user prompt within a session, yields events in real-time, + and saves the chat history at the end. """ session = db.query(models.Session).options( joinedload(models.Session.messages) @@ -49,19 +50,18 @@ if session.title in (None, "New Chat Session", ""): session.title = prompt[:60].strip() + ("..." if len(prompt) > 60 else "") - # Keep provider_name in sync with the model actually being used + # Keep provider_name in sync if session.provider_name != provider_name: session.provider_name = provider_name db.commit() - # Resolve provider: User Prefs > System Settings + # Resolve provider llm_prefs = {} user = session.user if user and user.preferences: llm_prefs = user.preferences.get("llm", {}).get("providers", {}).get(provider_name, {}) - # System Settings Fallback if (not llm_prefs or not llm_prefs.get("api_key") or "*" in str(llm_prefs.get("api_key"))) and user_service: system_prefs = user_service.get_system_settings(db) system_provider_prefs = system_prefs.get("llm", {}).get("providers", {}).get(provider_name, {}) @@ -73,7 +73,6 @@ api_key_override = llm_prefs.get("api_key") model_name_override = llm_prefs.get("model", "") - # Get the appropriate LLM provider with all extra prefs passed as kwargs kwargs = {k: v for k, v in llm_prefs.items() if k not in ["api_key", "model"]} llm_provider = get_llm_provider( provider_name, @@ -82,23 +81,17 @@ **kwargs ) - # Configure retrievers for the pipeline context_chunks = [] if load_faiss_retriever: if self.faiss_retriever: - context_chunks.extend(self.faiss_retriever.retrieve_context(query=prompt, db=db)) # Ensure FAISS index is loaded - else: - print("Warning: FaissDBRetriever requested but not available. Proceeding without it.") + context_chunks.extend(self.faiss_retriever.retrieve_context(query=prompt, db=db)) rag_pipeline = RagPipeline() - # Discover available tools (Skills) for the current user - # Filter by session.feature_name (e.g. 'chat' or 'voice') tools = [] if self.tool_service: tools = self.tool_service.get_available_tools(db, session.user_id, feature=session.feature_name) - # Gather information about attached nodes for the system prompt mesh_context = "" if session.attached_node_ids: nodes = db.query(models.AgentNode).filter(models.AgentNode.node_id.in_(session.attached_node_ids)).all() @@ -110,7 +103,6 @@ mesh_context += f" Description: {node.description or 'No description provided.'}\n" mesh_context += f" Status: {node.last_status}\n" - # M6: Sandbox Configuration - Expose permissions to AI shell_config = (node.skill_config or {}).get("shell", {}) if shell_config.get("enabled"): sandbox = shell_config.get("sandbox", {}) @@ -124,33 +116,31 @@ elif mode == "PERMISSIVE": mesh_context += f" AI Restricted Commands (Blacklist): {', '.join(denied) if denied else 'None'}\n" - # AI advice if mode == "STRICT" and not allowed: mesh_context += " ⚠️ Warning: All shell commands are currently blocked by sandbox policy.\n" - # AI Visibility: Include recent terminal history - if user_service and hasattr(user_service, "node_registry_service") or True: # Use registry if available - # We try to get the registry from the user_service or a global if needed - # But simpler: we passed it in (optional arg) - registry = getattr(self, "node_registry_service", None) - if not registry and user_service: - registry = getattr(user_service, "node_registry_service", None) - - if registry: - live = registry.get_node(node.node_id) - if live and live.terminal_history: - # Show last 40 lines (approx) to avoid context bloat - history = live.terminal_history[-40:] - mesh_context += " Recent Terminal Output:\n" - mesh_context += " ```\n" - for line in history: - mesh_context += f" {line}" - if not history[-1].endswith('\n'): mesh_context += "\n" - mesh_context += " ```\n" - + # AI Visibility: Recent terminal history + registry = getattr(self, "node_registry_service", None) + if not registry and user_service: + registry = getattr(user_service, "node_registry_service", None) + + if registry: + live = registry.get_node(node.node_id) + if live and live.terminal_history: + history = live.terminal_history[-40:] + mesh_context += " Recent Terminal Output:\n" + mesh_context += " ```\n" + for line in history: mesh_context += f" {line}" + if not history[-1].endswith('\n'): mesh_context += "\n" + mesh_context += " ```\n" mesh_context += "\n" - answer_text = await rag_pipeline.forward( + # Accumulators for the DB save at the end + full_answer = "" + full_reasoning = "" + + # Stream from pipeline + async for event in rag_pipeline.forward( question=prompt, history=session.messages, context_chunks = context_chunks, @@ -163,15 +153,38 @@ user_id = user_id or session.user_id, feature_name = session.feature_name, prompt_slug = "rag-pipeline" + ): + if event["type"] == "content": + full_answer += event["content"] + elif event["type"] == "reasoning": + full_reasoning += event["content"] + + # Forward the event to the API stream + yield event + + # Save assistant's response to DB + assistant_message = models.Message( + session_id=session_id, + sender="assistant", + content=full_answer, + # We assume your models.Message might have these or we just save content ) - - # Save assistant's response - assistant_message = models.Message(session_id=session_id, sender="assistant", content=answer_text) + # Optional: if model supports reasoning_content field + if full_reasoning and hasattr(assistant_message, "reasoning_content"): + assistant_message.reasoning_content = full_reasoning + db.add(assistant_message) db.commit() db.refresh(assistant_message) - return answer_text, provider_name, assistant_message.id + # Yield a final finish event with metadata + yield { + "type": "finish", + "message_id": assistant_message.id, + "provider": provider_name, + "full_answer": full_answer + } + def get_message_history(self, db: Session, session_id: int) -> List[models.Message]: """ diff --git a/ai-hub/app/db/migrate.py b/ai-hub/app/db/migrate.py index 562b0ea..07cef65 100644 --- a/ai-hub/app/db/migrate.py +++ b/ai-hub/app/db/migrate.py @@ -24,8 +24,10 @@ required_columns = [ ("audio_path", "TEXT"), ("model_response_time", "INTEGER"), - ("token_count", "INTEGER") + ("token_count", "INTEGER"), + ("reasoning_content", "TEXT") ] + for col_name, col_type in required_columns: if col_name not in columns: diff --git a/ai-hub/app/db/models.py b/ai-hub/app/db/models.py index d565feb..b780b58 100644 --- a/ai-hub/app/db/models.py +++ b/ai-hub/app/db/models.py @@ -141,10 +141,13 @@ # A JSON field to store unstructured metadata about the message, such as tool calls. # This column has been renamed from 'metadata' to avoid a conflict. message_metadata = Column(JSON, nullable=True) + # The progressive reasoning or 'thinking' step for models that support it (e.g. DeepSeek R1) + reasoning_content = Column(Text, nullable=True) # Path to the generated audio file for this message, if any. audio_path = Column(String, nullable=True) + # Relationship back to the parent Session. # This allows us to access the parent Session object from a Message object. session = relationship("Session", back_populates="messages") diff --git a/ui/client-app/src/components/ChatWindow.css b/ui/client-app/src/components/ChatWindow.css index b780266..8b923b7 100644 --- a/ui/client-app/src/components/ChatWindow.css +++ b/ui/client-app/src/components/ChatWindow.css @@ -1,139 +1,105 @@ -/* Scoped to .assistant-message */ +/* Modern AI Tool Styles */ +:root { + --user-bubble-bg: linear-gradient(135deg, #6366f1 0%, #4338ca 100%); + --assistant-bubble-bg: rgba(255, 255, 255, 0.05); + --reasoning-bg: rgba(99, 102, 241, 0.03); + --border-subtle: rgba(255, 255, 255, 0.1); +} +.dark { + --assistant-bubble-bg: rgba(31, 41, 55, 0.5); + --border-subtle: rgba(255, 255, 255, 0.08); +} + +.assistant-message { + background: var(--assistant-bubble-bg) !important; + backdrop-filter: blur(8px); + border: 1px solid var(--border-subtle) !important; + border-radius: 1.25rem !important; + font-family: 'Inter', sans-serif; + animation: slideInUp 0.3s ease-out; +} + +.user-message-container { + background: var(--user-bubble-bg) !important; + border-radius: 1.25rem !important; + font-family: 'Inter', sans-serif; + animation: slideInUp 0.3s ease-out; +} + +@keyframes slideInUp { + from { + opacity: 0; + transform: translateY(10px) scale(0.98); + } + + to { + opacity: 1; + transform: translateY(0) scale(1); + } +} + +.thought-panel { + background: var(--reasoning-bg); + border-left: 3px solid #6366f1; + border-radius: 0 0.75rem 0.75rem 0; + margin: 0.5rem 0; +} + +.status-chip { + padding: 0.25rem 0.75rem; + border-radius: 9999px; + background: rgba(99, 102, 241, 0.1); + border: 1px solid rgba(99, 102, 241, 0.2); + display: inline-flex; + align-items: center; + gap: 0.5rem; +} + +/* Scoped to .assistant-message */ .assistant-message code { - color: #333; - font-size: 100%; - font-weight: bold; - background-color: #f4f4f4; - border-radius: 0.25rem; - padding: 0.125rem 0.25rem; - font-family: monospace; + color: #818cf8; + font-size: 90%; + background-color: rgba(99, 102, 241, 0.1); + border-radius: 4px; + padding: 2px 4px; } .assistant-message pre { - font-family: 'Courier New', monospace; - background-color: #f4f4f4; - padding: 1em; - border-radius: 5px; - border: 1px solid #ddd; - overflow-x: auto; - white-space: pre-wrap; - word-wrap: break-word; - line-height: 1.5; - color: #333; + background-color: #0f172a; + border: 1px solid rgba(255, 255, 255, 0.1); + color: #e2e8f0; + padding: 1.25rem; + border-radius: 0.75rem; } -.assistant-message .p-4 { - padding: 1rem; +.thinking-dots::after { + content: ''; + animation: dots 1.5s steps(5, end) infinite; } -.assistant-message .rounded-lg { - border-radius: 0.5rem; -} +@keyframes dots { -.assistant-message .shadow-md { - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); -} + 0%, + 20% { + content: ''; + } -.assistant-message .w-full { - width: 100%; -} + 40% { + content: '.'; + } -.assistant-message .bg-gray-200 { - background-color: #e5e7eb; -} + 60% { + content: '..'; + } -.assistant-message .text-gray-900 { - color: #111827; -} - -.assistant-message .mb-2 { - margin-bottom: 0.5rem; -} - -.assistant-message .text-sm { - font-size: 0.875rem; -} - -.assistant-message .font-semibold { - font-weight: 600; -} - -.assistant-message .text-indigo-700 { - color: #4338ca; -} - -.assistant-message .hover\:text-indigo-800:hover { - color: #3730a3; -} - -.assistant-message .focus\:outline-none:focus { - outline: 0; -} - -.assistant-message .mt-2 { - margin-top: 0.5rem; -} - -.assistant-message .text-xs { - font-size: 0.75rem; -} - -.assistant-message .transition-max-h { - transition-property: max-height; -} - -.assistant-message .duration-500 { - transition-duration: 500ms; -} - -.assistant-message .ease-in-out { - transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1); -} - -.assistant-message .overflow-hidden { - overflow: hidden; -} - -.assistant-message .max-h-0 { - max-height: 0; -} - -.assistant-message .text-gray-700 { - color: #374151; -} - -.assistant-message h1 { - font-size: 1.5rem; - font-weight: bold; - margin-top: 1rem; - margin-bottom: 0.5rem; -} - -.assistant-message h2 { - font-size: 1.25rem; - font-weight: bold; - margin-top: 1rem; - margin-bottom: 0.5rem; - border-bottom: 1px solid #e5e7eb; - padding-bottom: 0.25rem; -} - -.assistant-message p { - margin-bottom: 1rem; - line-height: 1.5; -} - -.assistant-message ul { - list-style-type: disc; - padding-left: 1.5rem; - margin-bottom: 1rem; -} - -.assistant-message li { - margin-bottom: 0.25rem; + 80%, + 100% { + content: '...'; + } } .assistant-message strong { - font-weight: bold; + font-weight: 700; + color: #6366f1; } \ No newline at end of file diff --git a/ui/client-app/src/components/ChatWindow.js b/ui/client-app/src/components/ChatWindow.js index 77dc975..f7fc8f0 100644 --- a/ui/client-app/src/components/ChatWindow.js +++ b/ui/client-app/src/components/ChatWindow.js @@ -88,8 +88,8 @@ } } }; - const assistantMessageClasses = `p-3 pb-2 rounded-2xl shadow-sm max-w-[85%] bg-gray-200 dark:bg-gray-800 text-gray-900 dark:text-gray-100 assistant-message mr-auto border border-gray-300 dark:border-gray-700/50`; - const userMessageClasses = `max-w-[80%] p-3 pb-2 rounded-2xl shadow-sm bg-indigo-600 text-white ml-auto`; + const assistantMessageClasses = `p-4 rounded-2xl shadow-lg max-w-[85%] assistant-message mr-auto border border-gray-300 dark:border-gray-700/50 text-gray-900 dark:text-gray-100`; + const userMessageClasses = `max-w-[80%] p-4 rounded-2xl shadow-md text-white ml-auto user-message-container`; const formatTime = (iso) => { if (!iso) return ''; @@ -100,23 +100,39 @@ return (
+ {message.status && ( +
+
+
+ + {message.status} + +
+
+ )} {message.reasoning && ( -
+
{message.reasoning}
)} - {message.text} + + +
+ {message.text} +
+ {(message.isPureAnswer || !message.isUser) && (
{/* Horizontal line - only for voice chat to separate from voice controls */} diff --git a/ui/client-app/src/hooks/useSwarmControl.js b/ui/client-app/src/hooks/useSwarmControl.js index 1d3cdaf..e90d9c9 100644 --- a/ui/client-app/src/hooks/useSwarmControl.js +++ b/ui/client-app/src/hooks/useSwarmControl.js @@ -35,6 +35,8 @@ isUser: msg.sender === "user", isPureAnswer: true, text: msg.content, + reasoning: msg.reasoning_content, + timestamp: msg.created_at })); setChatHistory(formattedHistory); } @@ -130,16 +132,54 @@ } setIsProcessing(true); - setChatHistory((prev) => [...prev, { isUser: true, text }]); + setChatHistory((prev) => [...prev, { isUser: true, text, timestamp: new Date().toISOString() }]); try { - const response = await chatWithAI(sessionIdRef.current, text, localActiveLLM || "gemini"); + const startTime = Date.now(); + let reasoningStartTime = null; + let reasoningDuration = 0; + + // Add a placeholder message for the AI that we will update live setChatHistory((prev) => [...prev, { isUser: false, - isPureAnswer: true, - text: response.answer, - provider: response.provider_used + text: "", + reasoning: "", + status: "Thinking", // CSS animation will add the dots + provider: localActiveLLM, + timestamp: new Date().toISOString() }]); + + await chatWithAI(sessionIdRef.current, text, localActiveLLM || "gemini", (event) => { + setChatHistory((prev) => { + const newHistory = [...prev]; + const lastMsg = { ...newHistory[newHistory.length - 1] }; + + if (event.type === "reasoning") { + if (!reasoningStartTime) reasoningStartTime = Date.now(); + lastMsg.reasoning += event.content; + lastMsg.status = "Thinking"; + } else if (event.type === "content") { + if (reasoningStartTime && !lastMsg.thoughtDone) { + reasoningDuration = Math.round((Date.now() - reasoningStartTime) / 1000); + lastMsg.status = `Thought for ${reasoningDuration}s`; + lastMsg.thoughtDone = true; + } else if (!reasoningStartTime) { + lastMsg.status = null; + } + lastMsg.text += event.content; + } else if (event.type === "status") { + lastMsg.status = event.content; + } else if (event.type === "tool_start") { + lastMsg.status = `Calling tool: ${event.name}`; + } else if (event.type === "tool_result") { + lastMsg.status = `Tool ${event.name} returned.`; + } + + newHistory[newHistory.length - 1] = lastMsg; + return newHistory; + }); + }); + fetchTokenUsage(); } catch (error) { setErrorMessage(error.message); @@ -147,6 +187,7 @@ } finally { setIsProcessing(false); } + }, [isConfigured, localActiveLLM, fetchTokenUsage]); const handleSwitchSession = useCallback(async (targetSessionId) => { diff --git a/ui/client-app/src/hooks/useVoiceChat.js b/ui/client-app/src/hooks/useVoiceChat.js index 3564e91..7c53f3d 100644 --- a/ui/client-app/src/hooks/useVoiceChat.js +++ b/ui/client-app/src/hooks/useVoiceChat.js @@ -127,7 +127,9 @@ id: msg.id, isUser: msg.sender === "user", text: msg.content, + reasoning: msg.reasoning_content, timestamp: msg.created_at, + isFromHistory: true, audioBlob: audioBlob }; diff --git a/ui/client-app/src/services/apiService.js b/ui/client-app/src/services/apiService.js index f48bce8..039b41b 100644 --- a/ui/client-app/src/services/apiService.js +++ b/ui/client-app/src/services/apiService.js @@ -303,37 +303,83 @@ }; /** - * Sends a text prompt to the LLM endpoint and gets a text response. + * Sends a text prompt to the LLM endpoint and gets a streaming text response (SSE). * @param {string} sessionId - The current chat session ID. * @param {string} prompt - The user's text prompt. - * @returns {Promise} The AI's text response. + * @param {string} providerName - AI model provider. + * @param {function} onMessage - Callback for each event chunk {type, content}. */ -export const chatWithAI = async (sessionId, prompt, providerName = "gemini") => { +export const chatWithAI = async (sessionId, prompt, providerName = "gemini", onMessage = null) => { const userId = getUserId(); const response = await fetch(SESSIONS_CHAT_ENDPOINT(sessionId), { method: "POST", headers: { "Content-Type": "application/json", "X-User-ID": userId }, body: JSON.stringify({ prompt: prompt, provider_name: providerName }), }); + if (!response.ok) { let detail = "LLM API failed"; try { const errBody = await response.json(); - if (typeof errBody.detail === "string") { - detail = errBody.detail; - } else if (Array.isArray(errBody.detail)) { - // Pydantic 422 errors: [{"loc":..., "msg":..., "type":...}] - detail = errBody.detail.map((err) => `${err.loc.join(".")}: ${err.msg}`).join("; "); - } else if (errBody.detail) { - detail = JSON.stringify(errBody.detail); - } + detail = errBody.detail || JSON.stringify(errBody); } catch { } throw new Error(detail); } - const result = await response.json(); - return result; + + // Handle Streaming Response + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let accumulatedBuffer = ""; + + // Track final result for backward compatibility + let fullAnswer = ""; + let lastMessageId = null; + let finalProvider = providerName; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + accumulatedBuffer += decoder.decode(value, { stream: true }); + + const parts = accumulatedBuffer.split("\n\n"); + accumulatedBuffer = parts.pop(); + + for (const part of parts) { + if (part.startsWith("data: ")) { + try { + const jsonStr = part.slice(6).trim(); + if (jsonStr) { + const data = JSON.parse(jsonStr); + + // Accumulate content and info + if (data.type === "content" && data.content) { + fullAnswer += data.content; + } else if (data.type === "finish") { + lastMessageId = data.message_id; + finalProvider = data.provider; + } + + // Pass to streaming callback if provided + if (onMessage) onMessage(data); + } + } catch (e) { + console.warn("Failed to parse SSE line:", part, e); + } + } + } + } + + // Return the full result as the standard API used to + return { + answer: fullAnswer, + message_id: lastMessageId, + provider_used: finalProvider + }; }; + + /** * Streams speech from the TTS endpoint and processes each chunk. * It uses a callback to pass the processed audio data back to the caller.