diff --git a/ai-hub/app/api/routes/sessions.py b/ai-hub/app/api/routes/sessions.py
index ee7276c..127477e 100644
--- a/ai-hub/app/api/routes/sessions.py
+++ b/ai-hub/app/api/routes/sessions.py
@@ -83,24 +83,38 @@
             raise HTTPException(status_code=500, detail=f"Failed to create session: {e}")
 
 
-    @router.post("/{session_id}/chat", response_model=schemas.ChatResponse, summary="Send a Message in a Session")
+    @router.post("/{session_id}/chat", summary="Send a Message in a Session (Streaming)")
     async def chat_in_session(
         session_id: int,
         request: schemas.ChatRequest,
         db: Session = Depends(get_db)
     ):
-        try:
-            response_text, provider_used, message_id = await services.rag_service.chat_with_rag(
-                db=db,
-                session_id=session_id,
-                prompt=request.prompt,
-                provider_name=request.provider_name,
-                load_faiss_retriever=request.load_faiss_retriever,
-                user_service=services.user_service
-            )
-            return schemas.ChatResponse(answer=response_text, provider_used=provider_used, message_id=message_id)
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"An error occurred during chat: {e}")
+        """
+        Streams AI response using Server-Sent Events (SSE).
+        Yields tokens, reasoning, and tool executions in real-time.
+        """
+        from fastapi.responses import StreamingResponse
+        import json
+
+        async def event_generator():
+            try:
+                # chat_with_rag is now an AsyncGenerator
+                async for event in services.rag_service.chat_with_rag(
+                    db=db,
+                    session_id=session_id,
+                    prompt=request.prompt,
+                    provider_name=request.provider_name,
+                    load_faiss_retriever=request.load_faiss_retriever,
+                    user_service=services.user_service
+                ):
+                    yield f"data: {json.dumps(event)}\n\n"
+            except Exception as e:
+                import logging
+                logging.exception("Error in chat streaming")
+                yield f"data: {json.dumps({'type': 'error', 'content': str(e)})}\n\n"
+
+        return StreamingResponse(event_generator(), media_type="text/event-stream")
+
 
     @router.get("/{session_id}/messages", response_model=schemas.MessageHistoryResponse, summary="Get Session Chat History")
     def get_session_messages(session_id: int, db: Session = Depends(get_db)):
diff --git a/ai-hub/app/api/schemas.py b/ai-hub/app/api/schemas.py
index 1f6bac5..3a979e5 100644
--- a/ai-hub/app/api/schemas.py
+++ b/ai-hub/app/api/schemas.py
@@ -230,8 +230,11 @@
     sender: Literal["user", "assistant"]
     # The text content of the message.
     content: str
+    # The progressive reasoning or 'thinking' step for models that support it
+    reasoning_content: Optional[str] = None
     # The timestamp for when the message was created.
     created_at: datetime
+
     # URL to the saved audio file
     audio_url: Optional[str] = None
     # Whether audio exists for this message
diff --git a/ai-hub/app/core/pipelines/rag_pipeline.py b/ai-hub/app/core/pipelines/rag_pipeline.py
index 9dcfaee..445adad 100644
--- a/ai-hub/app/core/pipelines/rag_pipeline.py
+++ b/ai-hub/app/core/pipelines/rag_pipeline.py
@@ -69,7 +69,7 @@
         user_id: Optional[str] = None,
         feature_name: str = "chat",
         prompt_slug: str = "rag-pipeline"
-    ) -> str:
+    ):
         logging.debug(f"[RagPipeline.forward] Received question: '{question}'")
 
         if not llm_provider:
@@ -95,7 +95,6 @@
         )
 
         # 1. Prepare initial messages
-        # We put the 'question' as the user message and use 'system_prompt' for instructions/context
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": question}
@@ -103,13 +102,12 @@
 
         # 2. Agentic Tool Loop (Max 5 turns to prevent infinite loops)
         for turn in range(5):
-            request_kwargs = {}
+            request_kwargs = {"stream": True}
             if tools:
                 request_kwargs["tools"] = tools
                 request_kwargs["tool_choice"] = "auto"
 
             model = getattr(llm_provider, "model_name", "unknown")
-            # Safely calculate total characters in messages, handling None content
             msg_lens = []
             for m in messages:
                 content = ""
@@ -122,59 +120,97 @@
             total_chars = sum(msg_lens)
             tool_count = len(tools) if tools else 0
             
-            logging.info(f"[RagPipeline] Turn {turn+1} starting. Model: {model}, Messages: {len(messages)}, Total Chars: {total_chars}, Tools: {tool_count}")
-            # Log the specific turn metadata for diagnostics
-            logging.debug(f"[RagPipeline] Turn {turn+1} Payload Metadata: {messages}")
+            logging.info(f"[RagPipeline] Turn {turn+1} starting (STREAMING). Model: {model}, Messages: {len(messages)}, Total Chars: {total_chars}")
 
+            # LiteLLM streaming call
             prediction = await llm_provider.acompletion(messages=messages, **request_kwargs)
             
-            choices = getattr(prediction, "choices", None)
-            if not choices or len(choices) == 0:
-                finish_reason = getattr(prediction, "finish_reason", "unknown")
-                if choices is not None and len(choices) == 0:
-                     # Some providers return empty list for safety filters
-                     logging.warning(f"[RagPipeline.forward] LLM ({model}) returned 0 choices. Turn: {turn+1}. Filter/Safety Trigger likely.")
+            accumulated_content = ""
+            accumulated_reasoning = ""
+            tool_calls_map = {} # index -> tc object
+            
+            async for chunk in prediction:
+                if not chunk.choices: continue
+                delta = chunk.choices[0].delta
                 
-                logging.error(f"[RagPipeline.forward] LLM ({model}) failed at Turn {turn+1}. Reason: {finish_reason}. Full Response: {prediction}")
-                return (f"The AI provider ({model}) returned an empty response (Turn {turn+1}). "
-                        f"Reason: {finish_reason}. Context: {total_chars} chars, {tool_count} tools. "
-                        "This is often a safety filter blocking the prompt.")
+                # A. Handle Reasoning (Thinking)
+                # Some models use 'reasoning_content' (OpenAI-compatible / DeepSeek)
+                reasoning = getattr(delta, "reasoning_content", None) or delta.get("reasoning_content")
+                if reasoning:
+                    accumulated_reasoning += reasoning
+                    yield {"type": "reasoning", "content": reasoning}
+                
+                # B. Handle Content
+                content = getattr(delta, "content", None) or delta.get("content")
+                if content:
+                    accumulated_content += content
+                    yield {"type": "content", "content": content}
+                
+                # C. Handle Tool Calls
+                tool_calls = getattr(delta, "tool_calls", None) or delta.get("tool_calls")
+                if tool_calls:
+                    for tc_delta in tool_calls:
+                        idx = tc_delta.index
+                        if idx not in tool_calls_map:
+                            tool_calls_map[idx] = tc_delta
+                        else:
+                            # Accumulate arguments
+                            if tc_delta.function.arguments:
+                                tool_calls_map[idx].function.arguments += tc_delta.function.arguments
 
-            message = prediction.choices[0].message
-            
-            # If no tool calls, we are done
-            if not getattr(message, "tool_calls", None):
-                raw_response = message.content or ""
-                if self.response_postprocessor:
-                    return self.response_postprocessor(raw_response)
-                return raw_response
+            # Process completed turn
+            if not tool_calls_map:
+                # If no tools, this is the final answer for this forward pass.
+                # However, we might have accumulated content/reasoning. 
+                # We stop the loop and return.
+                return
 
-            # Process tool calls
-            messages.append(message) # Add assistant message with tool_calls
+            # Reconstruct the tool call list and message object for the next turn
+            processed_tool_calls = list(tool_calls_map.values())
             
-            for tool_call in message.tool_calls:
-                func_name = tool_call.function.name
+            # Format as an OpenAI-style message for the history
+            # But await completion returns thin models, we need a serializable dict/object
+            assistant_msg = {
+                "role": "assistant",
+                "content": accumulated_content or None,
+                "tool_calls": processed_tool_calls
+            }
+            if accumulated_reasoning:
+                assistant_msg["reasoning_content"] = accumulated_reasoning
+            
+            messages.append(assistant_msg)
+            
+            for tc in processed_tool_calls:
+                func_name = tc.function.name
                 func_args = {}
                 try:
                     import json
-                    func_args = json.loads(tool_call.function.arguments)
+                    func_args = json.loads(tc.function.arguments)
                 except: pass
 
+                yield {"type": "status", "content": f"AI decided to use tool: {func_name}"}
                 logging.info(f"[🔧] Agent calling tool: {func_name} with {func_args}")
                 
                 if tool_service:
+                    # Notify UI about tool execution start
+                    yield {"type": "tool_start", "name": func_name, "args": func_args}
+                    
                     result = await tool_service.call_tool(func_name, func_args, db=db, user_id=user_id)
                 else:
                     result = {"success": False, "error": "Tool service not available"}
 
+                # Stream the result back so UI can see "behind the scenes"
+                yield {"type": "tool_result", "name": func_name, "result": result}
+                
                 messages.append({
                     "role": "tool",
-                    "tool_call_id": tool_call.id,
+                    "tool_call_id": tc.id,
                     "name": func_name,
-                    "content": str(result)
+                    "content": json.dumps(result) if isinstance(result, dict) else str(result)
                 })
 
-        return "Agent loop reached maximum turns without a final response."
+        yield {"type": "error", "content": "Agent loop reached maximum turns without a final response."}
+
 
     def _build_prompt(self, context, history, question):
         return f"""Generate a natural and context-aware answer to the user's question using the provided knowledge and conversation history.
diff --git a/ai-hub/app/core/services/rag.py b/ai-hub/app/core/services/rag.py
index 949baf2..a58d6c0 100644
--- a/ai-hub/app/core/services/rag.py
+++ b/ai-hub/app/core/services/rag.py
@@ -28,9 +28,10 @@
         load_faiss_retriever: bool = False,
         user_service = None,
         user_id: Optional[str] = None
-    ) -> Tuple[str, str, int]:
+    ):
         """
-        Processes a user prompt within a session, saves the chat history, and returns a response.
+        Processes a user prompt within a session, yields events in real-time, 
+        and saves the chat history at the end.
         """
         session = db.query(models.Session).options(
             joinedload(models.Session.messages)
@@ -49,19 +50,18 @@
         if session.title in (None, "New Chat Session", ""):
             session.title = prompt[:60].strip() + ("..." if len(prompt) > 60 else "")
 
-        # Keep provider_name in sync with the model actually being used
+        # Keep provider_name in sync
         if session.provider_name != provider_name:
             session.provider_name = provider_name
         
         db.commit()
 
-        # Resolve provider: User Prefs > System Settings
+        # Resolve provider
         llm_prefs = {}
         user = session.user
         if user and user.preferences:
             llm_prefs = user.preferences.get("llm", {}).get("providers", {}).get(provider_name, {})
 
-        # System Settings Fallback
         if (not llm_prefs or not llm_prefs.get("api_key") or "*" in str(llm_prefs.get("api_key"))) and user_service:
             system_prefs = user_service.get_system_settings(db)
             system_provider_prefs = system_prefs.get("llm", {}).get("providers", {}).get(provider_name, {})
@@ -73,7 +73,6 @@
         api_key_override = llm_prefs.get("api_key")
         model_name_override = llm_prefs.get("model", "")
             
-        # Get the appropriate LLM provider with all extra prefs passed as kwargs
         kwargs = {k: v for k, v in llm_prefs.items() if k not in ["api_key", "model"]}
         llm_provider = get_llm_provider(
             provider_name, 
@@ -82,23 +81,17 @@
             **kwargs
         )
         
-        # Configure retrievers for the pipeline
         context_chunks = []
         if load_faiss_retriever:
             if self.faiss_retriever:
-               context_chunks.extend(self.faiss_retriever.retrieve_context(query=prompt, db=db))  # Ensure FAISS index is loaded
-            else:
-                print("Warning: FaissDBRetriever requested but not available. Proceeding without it.")
+               context_chunks.extend(self.faiss_retriever.retrieve_context(query=prompt, db=db))
         
         rag_pipeline = RagPipeline()
         
-        # Discover available tools (Skills) for the current user
-        # Filter by session.feature_name (e.g. 'chat' or 'voice')
         tools = []
         if self.tool_service:
             tools = self.tool_service.get_available_tools(db, session.user_id, feature=session.feature_name)
             
-        # Gather information about attached nodes for the system prompt
         mesh_context = ""
         if session.attached_node_ids:
             nodes = db.query(models.AgentNode).filter(models.AgentNode.node_id.in_(session.attached_node_ids)).all()
@@ -110,7 +103,6 @@
                     mesh_context += f"  Description: {node.description or 'No description provided.'}\n"
                     mesh_context += f"  Status: {node.last_status}\n"
                     
-                    # M6: Sandbox Configuration - Expose permissions to AI
                     shell_config = (node.skill_config or {}).get("shell", {})
                     if shell_config.get("enabled"):
                         sandbox = shell_config.get("sandbox", {})
@@ -124,33 +116,31 @@
                         elif mode == "PERMISSIVE":
                             mesh_context += f"  AI Restricted Commands (Blacklist): {', '.join(denied) if denied else 'None'}\n"
                         
-                        # AI advice
                         if mode == "STRICT" and not allowed:
                              mesh_context += "  ⚠️ Warning: All shell commands are currently blocked by sandbox policy.\n"
 
-                    # AI Visibility: Include recent terminal history
-                    if user_service and hasattr(user_service, "node_registry_service") or True: # Use registry if available
-                        # We try to get the registry from the user_service or a global if needed
-                        # But simpler: we passed it in (optional arg)
-                        registry = getattr(self, "node_registry_service", None)
-                        if not registry and user_service:
-                            registry = getattr(user_service, "node_registry_service", None)
-                        
-                        if registry:
-                            live = registry.get_node(node.node_id)
-                            if live and live.terminal_history:
-                                # Show last 40 lines (approx) to avoid context bloat
-                                history = live.terminal_history[-40:]
-                                mesh_context += "  Recent Terminal Output:\n"
-                                mesh_context += "  ```\n"
-                                for line in history:
-                                    mesh_context += f"  {line}"
-                                if not history[-1].endswith('\n'): mesh_context += "\n"
-                                mesh_context += "  ```\n"
-
+                    # AI Visibility: Recent terminal history
+                    registry = getattr(self, "node_registry_service", None)
+                    if not registry and user_service:
+                        registry = getattr(user_service, "node_registry_service", None)
+                    
+                    if registry:
+                        live = registry.get_node(node.node_id)
+                        if live and live.terminal_history:
+                            history = live.terminal_history[-40:]
+                            mesh_context += "  Recent Terminal Output:\n"
+                            mesh_context += "  ```\n"
+                            for line in history: mesh_context += f"  {line}"
+                            if not history[-1].endswith('\n'): mesh_context += "\n"
+                            mesh_context += "  ```\n"
                 mesh_context += "\n"
 
-        answer_text = await rag_pipeline.forward(
+        # Accumulators for the DB save at the end
+        full_answer = ""
+        full_reasoning = ""
+        
+        # Stream from pipeline
+        async for event in rag_pipeline.forward(
             question=prompt, 
             history=session.messages,
             context_chunks = context_chunks,
@@ -163,15 +153,38 @@
             user_id = user_id or session.user_id,
             feature_name = session.feature_name,
             prompt_slug = "rag-pipeline"
+        ):
+            if event["type"] == "content":
+                full_answer += event["content"]
+            elif event["type"] == "reasoning":
+                full_reasoning += event["content"]
+            
+            # Forward the event to the API stream
+            yield event
+
+        # Save assistant's response to DB
+        assistant_message = models.Message(
+            session_id=session_id, 
+            sender="assistant", 
+            content=full_answer,
+            # We assume your models.Message might have these or we just save content
         )
-        
-        # Save assistant's response
-        assistant_message = models.Message(session_id=session_id, sender="assistant", content=answer_text)
+        # Optional: if model supports reasoning_content field
+        if full_reasoning and hasattr(assistant_message, "reasoning_content"):
+            assistant_message.reasoning_content = full_reasoning
+            
         db.add(assistant_message)
         db.commit()
         db.refresh(assistant_message)
 
-        return answer_text, provider_name, assistant_message.id
+        # Yield a final finish event with metadata
+        yield {
+            "type": "finish", 
+            "message_id": assistant_message.id, 
+            "provider": provider_name,
+            "full_answer": full_answer
+        }
+
 
     def get_message_history(self, db: Session, session_id: int) -> List[models.Message]:
         """
diff --git a/ai-hub/app/db/migrate.py b/ai-hub/app/db/migrate.py
index 562b0ea..07cef65 100644
--- a/ai-hub/app/db/migrate.py
+++ b/ai-hub/app/db/migrate.py
@@ -24,8 +24,10 @@
         required_columns = [
             ("audio_path", "TEXT"),
             ("model_response_time", "INTEGER"),
-            ("token_count", "INTEGER")
+            ("token_count", "INTEGER"),
+            ("reasoning_content", "TEXT")
         ]
+
         
         for col_name, col_type in required_columns:
             if col_name not in columns:
diff --git a/ai-hub/app/db/models.py b/ai-hub/app/db/models.py
index d565feb..b780b58 100644
--- a/ai-hub/app/db/models.py
+++ b/ai-hub/app/db/models.py
@@ -141,10 +141,13 @@
     # A JSON field to store unstructured metadata about the message, such as tool calls.
     # This column has been renamed from 'metadata' to avoid a conflict.
     message_metadata = Column(JSON, nullable=True)
+    # The progressive reasoning or 'thinking' step for models that support it (e.g. DeepSeek R1)
+    reasoning_content = Column(Text, nullable=True)
     # Path to the generated audio file for this message, if any.
     audio_path = Column(String, nullable=True)
 
 
+
     # Relationship back to the parent Session.
     # This allows us to access the parent Session object from a Message object.
     session = relationship("Session", back_populates="messages")
diff --git a/ui/client-app/src/components/ChatWindow.css b/ui/client-app/src/components/ChatWindow.css
index b780266..8b923b7 100644
--- a/ui/client-app/src/components/ChatWindow.css
+++ b/ui/client-app/src/components/ChatWindow.css
@@ -1,139 +1,105 @@
-/* Scoped to .assistant-message */
+/* Modern AI Tool Styles */
+:root {
+    --user-bubble-bg: linear-gradient(135deg, #6366f1 0%, #4338ca 100%);
+    --assistant-bubble-bg: rgba(255, 255, 255, 0.05);
+    --reasoning-bg: rgba(99, 102, 241, 0.03);
+    --border-subtle: rgba(255, 255, 255, 0.1);
+}
 
+.dark {
+    --assistant-bubble-bg: rgba(31, 41, 55, 0.5);
+    --border-subtle: rgba(255, 255, 255, 0.08);
+}
+
+.assistant-message {
+    background: var(--assistant-bubble-bg) !important;
+    backdrop-filter: blur(8px);
+    border: 1px solid var(--border-subtle) !important;
+    border-radius: 1.25rem !important;
+    font-family: 'Inter', sans-serif;
+    animation: slideInUp 0.3s ease-out;
+}
+
+.user-message-container {
+    background: var(--user-bubble-bg) !important;
+    border-radius: 1.25rem !important;
+    font-family: 'Inter', sans-serif;
+    animation: slideInUp 0.3s ease-out;
+}
+
+@keyframes slideInUp {
+    from {
+        opacity: 0;
+        transform: translateY(10px) scale(0.98);
+    }
+
+    to {
+        opacity: 1;
+        transform: translateY(0) scale(1);
+    }
+}
+
+.thought-panel {
+    background: var(--reasoning-bg);
+    border-left: 3px solid #6366f1;
+    border-radius: 0 0.75rem 0.75rem 0;
+    margin: 0.5rem 0;
+}
+
+.status-chip {
+    padding: 0.25rem 0.75rem;
+    border-radius: 9999px;
+    background: rgba(99, 102, 241, 0.1);
+    border: 1px solid rgba(99, 102, 241, 0.2);
+    display: inline-flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+
+/* Scoped to .assistant-message */
 .assistant-message code {
-    color: #333;
-    font-size: 100%;
-    font-weight: bold;
-    background-color: #f4f4f4;
-    border-radius: 0.25rem;
-    padding: 0.125rem 0.25rem;
-    font-family: monospace;
+    color: #818cf8;
+    font-size: 90%;
+    background-color: rgba(99, 102, 241, 0.1);
+    border-radius: 4px;
+    padding: 2px 4px;
 }
 
 .assistant-message pre {
-    font-family: 'Courier New', monospace;
-    background-color: #f4f4f4;
-    padding: 1em;
-    border-radius: 5px;
-    border: 1px solid #ddd;
-    overflow-x: auto;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-    line-height: 1.5;
-    color: #333;
+    background-color: #0f172a;
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    color: #e2e8f0;
+    padding: 1.25rem;
+    border-radius: 0.75rem;
 }
 
-.assistant-message .p-4 {
-    padding: 1rem;
+.thinking-dots::after {
+    content: '';
+    animation: dots 1.5s steps(5, end) infinite;
 }
 
-.assistant-message .rounded-lg {
-    border-radius: 0.5rem;
-}
+@keyframes dots {
 
-.assistant-message .shadow-md {
-    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
-}
+    0%,
+    20% {
+        content: '';
+    }
 
-.assistant-message .w-full {
-    width: 100%;
-}
+    40% {
+        content: '.';
+    }
 
-.assistant-message .bg-gray-200 {
-    background-color: #e5e7eb;
-}
+    60% {
+        content: '..';
+    }
 
-.assistant-message .text-gray-900 {
-    color: #111827;
-}
-
-.assistant-message .mb-2 {
-    margin-bottom: 0.5rem;
-}
-
-.assistant-message .text-sm {
-    font-size: 0.875rem;
-}
-
-.assistant-message .font-semibold {
-    font-weight: 600;
-}
-
-.assistant-message .text-indigo-700 {
-    color: #4338ca;
-}
-
-.assistant-message .hover\:text-indigo-800:hover {
-    color: #3730a3;
-}
-
-.assistant-message .focus\:outline-none:focus {
-    outline: 0;
-}
-
-.assistant-message .mt-2 {
-    margin-top: 0.5rem;
-}
-
-.assistant-message .text-xs {
-    font-size: 0.75rem;
-}
-
-.assistant-message .transition-max-h {
-    transition-property: max-height;
-}
-
-.assistant-message .duration-500 {
-    transition-duration: 500ms;
-}
-
-.assistant-message .ease-in-out {
-    transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
-}
-
-.assistant-message .overflow-hidden {
-    overflow: hidden;
-}
-
-.assistant-message .max-h-0 {
-    max-height: 0;
-}
-
-.assistant-message .text-gray-700 {
-    color: #374151;
-}
-
-.assistant-message h1 {
-    font-size: 1.5rem;
-    font-weight: bold;
-    margin-top: 1rem;
-    margin-bottom: 0.5rem;
-}
-
-.assistant-message h2 {
-    font-size: 1.25rem;
-    font-weight: bold;
-    margin-top: 1rem;
-    margin-bottom: 0.5rem;
-    border-bottom: 1px solid #e5e7eb;
-    padding-bottom: 0.25rem;
-}
-
-.assistant-message p {
-    margin-bottom: 1rem;
-    line-height: 1.5;
-}
-
-.assistant-message ul {
-    list-style-type: disc;
-    padding-left: 1.5rem;
-    margin-bottom: 1rem;
-}
-
-.assistant-message li {
-    margin-bottom: 0.25rem;
+    80%,
+    100% {
+        content: '...';
+    }
 }
 
 .assistant-message strong {
-    font-weight: bold;
+    font-weight: 700;
+    color: #6366f1;
 }
\ No newline at end of file
diff --git a/ui/client-app/src/components/ChatWindow.js b/ui/client-app/src/components/ChatWindow.js
index 77dc975..f7fc8f0 100644
--- a/ui/client-app/src/components/ChatWindow.js
+++ b/ui/client-app/src/components/ChatWindow.js
@@ -88,8 +88,8 @@
       }
     }
   };
-  const assistantMessageClasses = `p-3 pb-2 rounded-2xl shadow-sm max-w-[85%] bg-gray-200 dark:bg-gray-800 text-gray-900 dark:text-gray-100 assistant-message mr-auto border border-gray-300 dark:border-gray-700/50`;
-  const userMessageClasses = `max-w-[80%] p-3 pb-2 rounded-2xl shadow-sm bg-indigo-600 text-white ml-auto`;
+  const assistantMessageClasses = `p-4 rounded-2xl shadow-lg max-w-[85%] assistant-message mr-auto border border-gray-300 dark:border-gray-700/50 text-gray-900 dark:text-gray-100`;
+  const userMessageClasses = `max-w-[80%] p-4 rounded-2xl shadow-md text-white ml-auto user-message-container`;
 
   const formatTime = (iso) => {
     if (!iso) return '';
@@ -100,23 +100,39 @@
 
   return (
     <div className={message.isUser ? userMessageClasses : assistantMessageClasses}>
+      {message.status && (
+        <div className="mb-3 transition-all duration-300">
+          <div className="status-chip text-[10px] sm:text-xs text-indigo-600 dark:text-indigo-300 font-semibold italic">
+            <div className="w-2 h-2 bg-indigo-500 rounded-full animate-pulse"></div>
+            <span className={message.status === "Thinking" ? "thinking-dots" : ""}>
+              {message.status}
+            </span>
+          </div>
+        </div>
+      )}
       {message.reasoning && (
-        <div className="mb-2">
+        <div className="mb-3">
           <button
             onClick={toggleReasoning}
-            className="text-sm font-semibold text-indigo-700 hover:text-indigo-800 dark:text-gray-300 dark:hover:text-gray-100 focus:outline-none"
+            className="text-xs font-bold text-indigo-600 dark:text-indigo-400 hover:opacity-80 focus:outline-none flex items-center gap-2 px-2 py-1 rounded-md hover:bg-indigo-50 dark:hover:bg-indigo-900/20 transition-colors"
           >
-            {isReasoningExpanded ? "Hide Reasoning ▲" : "Show Reasoning ▼"}
+            <span className="text-[10px] transform transition-transform duration-200" style={{ transform: isReasoningExpanded ? 'rotate(90deg)' : 'rotate(0deg)' }}>▶</span>
+            {message.status && message.status.startsWith("Thought for") ? message.status.toUpperCase() : "THOUGHT TRACE"}
           </button>
           <div
-            className={`mt-2 text-xs transition-max-h duration-500 ease-in-out overflow-hidden ${isReasoningExpanded ? "max-h-96" : "max-h-0"
-              } text-gray-700 dark:text-gray-300`}
+            className={`mt-2 text-xs transition-all duration-500 ease-in-out overflow-hidden thought-panel pl-4 pr-2 py-1 ${isReasoningExpanded ? "max-h-[800px] opacity-100 mb-2" : "max-h-0 opacity-0"
+              } text-gray-600 dark:text-gray-400 font-light leading-relaxed`}
           >
             <ReactMarkdown>{message.reasoning}</ReactMarkdown>
           </div>
         </div>
       )}
-      <ReactMarkdown>{message.text}</ReactMarkdown>
+
+
+      <div className="prose dark:prose-invert max-w-none text-sm leading-relaxed">
+        <ReactMarkdown>{message.text}</ReactMarkdown>
+      </div>
+
       {(message.isPureAnswer || !message.isUser) && (
         <div className="justify-end items-center mt-2 group-audio-container">
           {/* Horizontal line - only for voice chat to separate from voice controls */}
diff --git a/ui/client-app/src/hooks/useSwarmControl.js b/ui/client-app/src/hooks/useSwarmControl.js
index 1d3cdaf..e90d9c9 100644
--- a/ui/client-app/src/hooks/useSwarmControl.js
+++ b/ui/client-app/src/hooks/useSwarmControl.js
@@ -35,6 +35,8 @@
           isUser: msg.sender === "user",
           isPureAnswer: true,
           text: msg.content,
+          reasoning: msg.reasoning_content,
+          timestamp: msg.created_at
         }));
         setChatHistory(formattedHistory);
       }
@@ -130,16 +132,54 @@
     }
 
     setIsProcessing(true);
-    setChatHistory((prev) => [...prev, { isUser: true, text }]);
+    setChatHistory((prev) => [...prev, { isUser: true, text, timestamp: new Date().toISOString() }]);
 
     try {
-      const response = await chatWithAI(sessionIdRef.current, text, localActiveLLM || "gemini");
+      const startTime = Date.now();
+      let reasoningStartTime = null;
+      let reasoningDuration = 0;
+
+      // Add a placeholder message for the AI that we will update live
       setChatHistory((prev) => [...prev, {
         isUser: false,
-        isPureAnswer: true,
-        text: response.answer,
-        provider: response.provider_used
+        text: "",
+        reasoning: "",
+        status: "Thinking", // CSS animation will add the dots
+        provider: localActiveLLM,
+        timestamp: new Date().toISOString()
       }]);
+
+      await chatWithAI(sessionIdRef.current, text, localActiveLLM || "gemini", (event) => {
+        setChatHistory((prev) => {
+          const newHistory = [...prev];
+          const lastMsg = { ...newHistory[newHistory.length - 1] };
+
+          if (event.type === "reasoning") {
+            if (!reasoningStartTime) reasoningStartTime = Date.now();
+            lastMsg.reasoning += event.content;
+            lastMsg.status = "Thinking";
+          } else if (event.type === "content") {
+            if (reasoningStartTime && !lastMsg.thoughtDone) {
+              reasoningDuration = Math.round((Date.now() - reasoningStartTime) / 1000);
+              lastMsg.status = `Thought for ${reasoningDuration}s`;
+              lastMsg.thoughtDone = true;
+            } else if (!reasoningStartTime) {
+              lastMsg.status = null;
+            }
+            lastMsg.text += event.content;
+          } else if (event.type === "status") {
+            lastMsg.status = event.content;
+          } else if (event.type === "tool_start") {
+            lastMsg.status = `Calling tool: ${event.name}`;
+          } else if (event.type === "tool_result") {
+            lastMsg.status = `Tool ${event.name} returned.`;
+          }
+
+          newHistory[newHistory.length - 1] = lastMsg;
+          return newHistory;
+        });
+      });
+
       fetchTokenUsage();
     } catch (error) {
       setErrorMessage(error.message);
@@ -147,6 +187,7 @@
     } finally {
       setIsProcessing(false);
     }
+
   }, [isConfigured, localActiveLLM, fetchTokenUsage]);
 
   const handleSwitchSession = useCallback(async (targetSessionId) => {
diff --git a/ui/client-app/src/hooks/useVoiceChat.js b/ui/client-app/src/hooks/useVoiceChat.js
index 3564e91..7c53f3d 100644
--- a/ui/client-app/src/hooks/useVoiceChat.js
+++ b/ui/client-app/src/hooks/useVoiceChat.js
@@ -127,7 +127,9 @@
                 id: msg.id,
                 isUser: msg.sender === "user",
                 text: msg.content,
+                reasoning: msg.reasoning_content,
                 timestamp: msg.created_at,
+
                 isFromHistory: true,
                 audioBlob: audioBlob
               };
diff --git a/ui/client-app/src/services/apiService.js b/ui/client-app/src/services/apiService.js
index f48bce8..039b41b 100644
--- a/ui/client-app/src/services/apiService.js
+++ b/ui/client-app/src/services/apiService.js
@@ -303,37 +303,83 @@
 };
 
 /**
- * Sends a text prompt to the LLM endpoint and gets a text response.
+ * Sends a text prompt to the LLM endpoint and gets a streaming text response (SSE).
  * @param {string} sessionId - The current chat session ID.
  * @param {string} prompt - The user's text prompt.
- * @returns {Promise<string>} The AI's text response.
+ * @param {string} providerName - AI model provider.
+ * @param {function} onMessage - Callback for each event chunk {type, content}.
  */
-export const chatWithAI = async (sessionId, prompt, providerName = "gemini") => {
+export const chatWithAI = async (sessionId, prompt, providerName = "gemini", onMessage = null) => {
   const userId = getUserId();
   const response = await fetch(SESSIONS_CHAT_ENDPOINT(sessionId), {
     method: "POST",
     headers: { "Content-Type": "application/json", "X-User-ID": userId },
     body: JSON.stringify({ prompt: prompt, provider_name: providerName }),
   });
+
   if (!response.ok) {
     let detail = "LLM API failed";
     try {
       const errBody = await response.json();
-      if (typeof errBody.detail === "string") {
-        detail = errBody.detail;
-      } else if (Array.isArray(errBody.detail)) {
-        // Pydantic 422 errors: [{"loc":..., "msg":..., "type":...}]
-        detail = errBody.detail.map((err) => `${err.loc.join(".")}: ${err.msg}`).join("; ");
-      } else if (errBody.detail) {
-        detail = JSON.stringify(errBody.detail);
-      }
+      detail = errBody.detail || JSON.stringify(errBody);
     } catch { }
     throw new Error(detail);
   }
-  const result = await response.json();
-  return result;
+
+  // Handle Streaming Response
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let accumulatedBuffer = "";
+
+  // Track final result for backward compatibility
+  let fullAnswer = "";
+  let lastMessageId = null;
+  let finalProvider = providerName;
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    accumulatedBuffer += decoder.decode(value, { stream: true });
+
+    const parts = accumulatedBuffer.split("\n\n");
+    accumulatedBuffer = parts.pop();
+
+    for (const part of parts) {
+      if (part.startsWith("data: ")) {
+        try {
+          const jsonStr = part.slice(6).trim();
+          if (jsonStr) {
+            const data = JSON.parse(jsonStr);
+
+            // Accumulate content and info
+            if (data.type === "content" && data.content) {
+              fullAnswer += data.content;
+            } else if (data.type === "finish") {
+              lastMessageId = data.message_id;
+              finalProvider = data.provider;
+            }
+
+            // Pass to streaming callback if provided
+            if (onMessage) onMessage(data);
+          }
+        } catch (e) {
+          console.warn("Failed to parse SSE line:", part, e);
+        }
+      }
+    }
+  }
+
+  // Return the full result as the standard API used to
+  return {
+    answer: fullAnswer,
+    message_id: lastMessageId,
+    provider_used: finalProvider
+  };
 };
 
+
+
 /**
  * Streams speech from the TTS endpoint and processes each chunk.
  * It uses a callback to pass the processed audio data back to the caller.