diff --git a/ai-hub/app/core/services/rag.py b/ai-hub/app/core/services/rag.py index 26c78c5..2dbd95a 100644 --- a/ai-hub/app/core/services/rag.py +++ b/ai-hub/app/core/services/rag.py @@ -186,6 +186,7 @@ tool_counts = {} input_tokens = 0 output_tokens = 0 + current_assistant_msg = None # Stream from specialized Architect async for event in architect.run( @@ -228,24 +229,54 @@ if hasattr(usage, "get"): input_tokens += usage.get("prompt_tokens", 0) output_tokens += usage.get("completion_tokens", 0) - + # Forward the event to the API stream yield event - # Save assistant's response to DB - assistant_message = models.Message( - session_id=session_id, - sender="assistant", - content=full_answer, - # We assume your models.Message might have these or we just save content - ) - # Optional: if model supports reasoning_content field - if full_reasoning and hasattr(assistant_message, "reasoning_content"): - assistant_message.reasoning_content = full_reasoning - - db.add(assistant_message) - db.commit() - db.refresh(assistant_message) + # Handle background updates for persistent UI observability + if event["type"] in ("content", "reasoning"): + # Initialize assistant message in DB on first token if it doesn't exist + if not current_assistant_msg: + current_assistant_msg = models.Message( + session_id=session_id, + sender="assistant", + content="" + ) + db.add(current_assistant_msg) + db.commit() + + # Update local accumulators + if event["type"] == "content": + current_assistant_msg.content += event["content"] + elif event["type"] == "reasoning" and hasattr(current_assistant_msg, "reasoning_content"): + if not current_assistant_msg.reasoning_content: + current_assistant_msg.reasoning_content = "" + current_assistant_msg.reasoning_content += event["content"] + + # Commit every 5 chunks to provide smooth UI streaming without hammering the DB + if (input_tokens + output_tokens) % 5 == 0: + try: + db.commit() + except: + db.rollback() + + # Final cleanup of the transient assistant message state + if current_assistant_msg: + current_assistant_msg.content = full_answer + if full_reasoning and hasattr(current_assistant_msg, "reasoning_content"): + current_assistant_msg.reasoning_content = full_reasoning + db.commit() + else: + # Fallback if no tokens were yielded but we reached the end + assistant_message = models.Message( + session_id=session_id, + sender="assistant", + content=full_answer + ) + if full_reasoning and hasattr(assistant_message, "reasoning_content"): + assistant_message.reasoning_content = full_reasoning + db.add(assistant_message) + db.commit() # Yield a final finish event with metadata yield {