diff --git a/ai-hub/app/core/services/rag.py b/ai-hub/app/core/services/rag.py
index 26c78c5..2dbd95a 100644
--- a/ai-hub/app/core/services/rag.py
+++ b/ai-hub/app/core/services/rag.py
@@ -186,6 +186,7 @@
         tool_counts = {}
         input_tokens = 0
         output_tokens = 0
+        current_assistant_msg = None
         
         # Stream from specialized Architect
         async for event in architect.run(
@@ -228,24 +229,54 @@
                 if hasattr(usage, "get"):
                     input_tokens += usage.get("prompt_tokens", 0)
                     output_tokens += usage.get("completion_tokens", 0)
-            
+
             # Forward the event to the API stream
             yield event
 
-        # Save assistant's response to DB
-        assistant_message = models.Message(
-            session_id=session_id, 
-            sender="assistant", 
-            content=full_answer,
-            # We assume your models.Message might have these or we just save content
-        )
-        # Optional: if model supports reasoning_content field
-        if full_reasoning and hasattr(assistant_message, "reasoning_content"):
-            assistant_message.reasoning_content = full_reasoning
-            
-        db.add(assistant_message)
-        db.commit()
-        db.refresh(assistant_message)
+            # Handle background updates for persistent UI observability
+            if event["type"] in ("content", "reasoning"):
+                # Initialize assistant message in DB on first token if it doesn't exist
+                if not current_assistant_msg:
+                    current_assistant_msg = models.Message(
+                        session_id=session_id, 
+                        sender="assistant", 
+                        content=""
+                    )
+                    db.add(current_assistant_msg)
+                    db.commit()
+                
+                # Update local accumulators
+                if event["type"] == "content":
+                    current_assistant_msg.content += event["content"]
+                elif event["type"] == "reasoning" and hasattr(current_assistant_msg, "reasoning_content"):
+                    if not current_assistant_msg.reasoning_content:
+                        current_assistant_msg.reasoning_content = ""
+                    current_assistant_msg.reasoning_content += event["content"]
+                
+                # Commit every 5 chunks to provide smooth UI streaming without hammering the DB
+                if (input_tokens + output_tokens) % 5 == 0:
+                    try:
+                        db.commit()
+                    except:
+                        db.rollback()
+
+        # Final cleanup of the transient assistant message state 
+        if current_assistant_msg:
+            current_assistant_msg.content = full_answer
+            if full_reasoning and hasattr(current_assistant_msg, "reasoning_content"):
+                current_assistant_msg.reasoning_content = full_reasoning
+            db.commit()
+        else:
+            # Fallback if no tokens were yielded but we reached the end
+            assistant_message = models.Message(
+                session_id=session_id, 
+                sender="assistant", 
+                content=full_answer
+            )
+            if full_reasoning and hasattr(assistant_message, "reasoning_content"):
+                assistant_message.reasoning_content = full_reasoning
+            db.add(assistant_message)
+            db.commit()
 
         # Yield a final finish event with metadata
         yield {