diff --git a/ai-hub/app/core/pipelines/file_selector.py b/ai-hub/app/core/pipelines/file_selector.py index 3226ed5..6575162 100644 --- a/ai-hub/app/core/pipelines/file_selector.py +++ b/ai-hub/app/core/pipelines/file_selector.py @@ -1,22 +1,30 @@ import dspy import json +from app.db import models from typing import List, Dict, Any class SelectFiles(dspy.Signature): """ - You are an expert file navigator for a large codebase. Your task is to select the most critical and relevant files to answer a user's question from a provided list of file paths. + You're an **expert file navigator** for a large codebase. Your task is to select the most critical and relevant file paths to answer a user's question. Your decision should be based on the **user's current question** and the **ongoing chat history**. All file paths you select must exist within the provided `retrieved_files` list. - Your selection criteria are: - 1. **Prioritize Core Files:** Choose files that are most likely to contain the central logic, definitions, or configuration directly related to the user's question. - 2. **Be Selective:** To avoid exceeding token limits, you must choose a small, focused set of files (e.g., typically 2-4 files). Do not select a large number of files. - 3. **Ignore Irrelevant Files:** Discard any files that appear to be placeholders, test files, or have names that are clearly unrelated to the user's query. - 4. **Infer User Intent:** If the user's question explicitly mentions a file path that is not present in the `retrieved_files` list (e.g., due to a typo or a partial path), use that as a strong hint. Analyze the `retrieved_files` list and select the path that is most similar to the one the user provided. You MUST still only return a file path that actually exists in the `retrieved_files` list, if you think none is related, return an empty array. + --- - The ONLY output you should provide is a parsable JSON array of strings. Do not include any other text, explanations, or markdown formatting (e.g., ```json...```). Your response must begin with `[` and end with `]`. Absolutely no other characters are allowed before or after the JSON array. The strings in the array MUST be enclosed in double quotes. + ### File Selection Criteria + + 1. **Prioritize Core Files:** Identify and select files that contain the central logic, definitions, or essential configurations directly related to the user's query and the chat history context. + 2. **Be Selective:** To avoid exceeding token limits, your response must be a small, highly focused set of files. Aim for **2 to 4 files**. Do not select a large number of files. + 3. **Exclude Irrelevant Files:** Discard files that are placeholders or have names unrelated to the user's request. Based on your knowledge, ignore compiled file types like `.pyc`, `.class`, `.o`, and `.exe`, as they are not core or text files. + 4. **Infer User Intent:** If the user or chat history mentions a file path that isn't in the `retrieved_files` list, use that as a strong hint. Find and select the path from the list that is most similar to the one mentioned. You **must** only return a file path that exists in the `retrieved_files` list. If you determine no files are related, return an empty array. + 5. **Completeness Check:** If the `retrieved_files` list already contains all the information you need to answer the question, it is acceptable to return an empty array. + + --- + + ### Output Format + + Your **ONLY** output is a parsable JSON array of strings. Do not include any other text, explanations, or markdown. Your response must begin with `[` and end with `]`. Absolutely no other characters are allowed before or after the JSON array. The strings in the array **must** be enclosed in double quotes. """ question = dspy.InputField(desc="The user's current question.") - retrieved_files = dspy.InputField(desc="A JSON string representing a list of all available file paths.") - answer = dspy.OutputField(format=list, desc="A JSON array of strings. Each string element in the array MUST be enclosed in double quotes.") + chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI.") question = dspy.InputField(desc="The user's current question.") retrieved_files = dspy.InputField(desc="A JSON string representing a list of all available file paths.") answer = dspy.OutputField(format=list, desc="A JSON array of strings. Each string element in the array MUST be enclosed in double quotes.") @@ -29,14 +37,20 @@ super().__init__() # Assign the system prompt directly to the dspy.Predict instance. self.select_files = dspy.ChainOfThought(SelectFiles) - - async def forward(self, question: str, retrieved_data: List[str]) -> List[str]: + def _default_history_formatter(self, history: List[models.Message]) -> str: + return "\n".join( + f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" + for msg in history + ) + + async def forward(self, question: str, retrieved_data: List[str], history: List[models.Message]) -> List[str]: # Convert the list of strings to a JSON string using json.dumps # The prompt is now explicitly asking for a JSON array of strings, so you can pass the raw JSON string. retrieved_json = json.dumps(retrieved_data) # Call the predictor with the necessary inputs prediction = await self.select_files.acall( question=question, + chat_history=self._default_history_formatter(history), retrieved_files=retrieved_json ) diff --git a/ai-hub/app/core/providers/llm/general.py b/ai-hub/app/core/providers/llm/general.py index d1197de..1cc9d31 100644 --- a/ai-hub/app/core/providers/llm/general.py +++ b/ai-hub/app/core/providers/llm/general.py @@ -7,7 +7,7 @@ self.api_key = api_key self.system_prompt = system_prompt # Call the parent constructor - super().__init__(model=model_name, max_tokens=100000, **kwargs) + super().__init__(model=model_name, max_tokens=10000000, **kwargs) def _prepare_messages(self, prompt=None, messages=None): """Helper to prepare the messages list, including the system prompt.""" diff --git a/ai-hub/app/core/services/workspace.py b/ai-hub/app/core/services/workspace.py index fdfc4f3..6915bce 100644 --- a/ai-hub/app/core/services/workspace.py +++ b/ai-hub/app/core/services/workspace.py @@ -508,14 +508,19 @@ return await self._store_retrieved_files(request_id=uuid.UUID(request_id), files=files) + session = self.db.query(models.Session).options( + joinedload(models.Session.messages) + ).filter(models.Session.id == file_request.session_id).first() + provider_name = data.get("provider_name", "gemini") llm_provider = get_llm_provider(provider_name) cfs = CodeRagFileSelector() - + retrieved_data = await self._retrieve_by_request_id(self.db, request_id=request_id) with dspy.context(lm=llm_provider): raw_answer_text = await cfs( question=file_request.question, - retrieved_data = await self.get_files_by_request_id(self.db, request_id=request_id) + retrieved_data=retrieved_data, + history=session.messages ) try: # Use ast.literal_eval for a safe and reliable parse @@ -535,7 +540,7 @@ "type": "thinking_log", "content": "AI did not select any files to retrieve content for." })) - await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id}) + await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) return await websocket.send_text(json.dumps({ @@ -662,7 +667,8 @@ })) await websocket.send_text(json.dumps({ "type": "chat_message", - "content": raw_answer_text + "content": raw_answer_text, + "reasoning": reasoning })) async def handle_command_output(self, websocket: WebSocket, data: Dict[str, Any]): diff --git a/ui/client-app/src/components/ChatWindow.css b/ui/client-app/src/components/ChatWindow.css index f7c99c7..a573ef5 100644 --- a/ui/client-app/src/components/ChatWindow.css +++ b/ui/client-app/src/components/ChatWindow.css @@ -6,4 +6,17 @@ border-radius: 4px; font-size: 90%; font-weight: bold; + } + + pre { + font-family: 'Courier New', monospace; + background-color: #f4f4f4; + padding: 1em; + border-radius: 5px; + border: 1px solid #ddd; + overflow-x: auto; + white-space: pre-wrap; /* This helps with wrapping long lines */ + word-wrap: break-word; /* Prevents overflow */ + line-height: 1.5; + color: #333; } \ No newline at end of file diff --git a/ui/client-app/src/hooks/useCodeAssistant.js b/ui/client-app/src/hooks/useCodeAssistant.js index 2b1db06..17c067c 100644 --- a/ui/client-app/src/hooks/useCodeAssistant.js +++ b/ui/client-app/src/hooks/useCodeAssistant.js @@ -120,7 +120,7 @@ currentHandle = await currentHandle.getDirectoryHandle(part); } } catch (error) { - console.error(`Error navigating to path part '${part}':`, error); + console.warn(`Path not found: ${filePath}`, error); return null; } } @@ -148,6 +148,9 @@ for (const filepath of filepaths) { try { const fileHandle = await getFileHandleFromPath(dirHandle, filepath); + if (!fileHandle) { + filesData.push({ filepath, content: null }); + } const file = await fileHandle.getFile(); const content = await file.text(); filesData.push({ filepath, content }); diff --git a/ui/client-app/src/services/websocket.js b/ui/client-app/src/services/websocket.js index 2976ef7..a1285d6 100644 --- a/ui/client-app/src/services/websocket.js +++ b/ui/client-app/src/services/websocket.js @@ -8,7 +8,6 @@ */ export const getSessionId = async () => { let sessionId = localStorage.getItem("sessionId"); - if (!sessionId) { // No existing session, so create one via API const session = await createSession(); @@ -38,6 +37,7 @@ ) => { try { let sessionId = localStorage.getItem("sessionId"); + sessionId = null; // Force create new session for testing if (!sessionId) { // No existing session, so create one via API