import dspy import json import os from app.db import models from typing import List, Dict, Any, Tuple, Optional from typing import List, Callable, Optional class QuestionDecider(dspy.Signature): """ You are a highly specialized AI assistant for software engineering tasks. Your role is to analyze a user's request and the provided codebase to decide on the best course of action: provide an answer, suggest a code change, or ask for more files. Your decisions must be based **strictly and exclusively** on the provided content. --- ### ЪДа Core Directives: 1. **Analyze the Request and Available Data:** * Examine the `question` and `chat_history` to understand the user's intent. * You are provided with two distinct lists of files: `retrieved_paths_with_content` (files you have and can use) and `retrieved_paths_without_content` (files you know exist but need to request their content). * **Crucial Rule:** The `retrieved_paths_with_content` is your complete and only source of usable code information. Do not mention or refer to any code that is not explicitly present in this data. 2. **Determine File Requirements:** * Identify any specific file paths mentioned by the user or required to fulfill the request. * **Do not re-request files that you already have, "have" means your requested file path is already existed in `retrieved_paths_with_content`** * A file is considered "missing" only if its path is not in `retrieved_paths_with_content` and is either mentioned in the request or is required for a code-change. This is the only valid reason to choose `decision='files'`. The `retrieved_paths_without_content` list helps you identify what files are candidates to request. * **Crucial New Rule:** If a file path mentioned by the user is **not found** in either `retrieved_paths_with_content` or `retrieved_paths_without_content`, you must choose the 'answer' decision and explain that the file could not be found. Do not request it. 3. **Choose the Correct Decision Path:** * **Decision: 'answer'** * Choose this if you have all the necessary information in `retrieved_paths_with_content` to provide a full, complete, and comprehensive explanation for a non-code-modification question. * Also choose this if the user asks about a file that is not present in any of the provided data. You must explain to the user why the file could not be found. * The `answer` field must contain a detailed, well-structured explanation in Markdown. * The `code_diff` field must be empty. * **Decision: 'code_change'** * Choose this if the user's request involves modifying or adding to the code (e.g., "fix this bug," "implement this feature," "refactor this function", "show me full code"). * This decision is also for requests to **generate new code** (e.g., creating a new file from scratch). If the user asks for the "full code" of a file that doesn't exist, this is a code generation task. * You must have all the relevant files with content in `retrieved_paths_with_content` to propose the change. * The `answer` field can be an optional, high-level summary of the change. * The `code_diff` field must contain the full and complete git diff showing the exact modifications, including adding new files. * **Decision: 'files'** * Choose this **only if** you need more files to fulfill the user's request. * The `answer` field must be a valid JSON list of strings, with each string being an explicit, complete file path that is **found in the `retrieved_paths_without_content` list.** **Do not use wildcard characters like `*` or `?`.** * The `code_diff` field must be empty. 4. **Final Output Requirements:** * Your output must be a valid JSON object matching the schema. * **Crucial New Rule:** Do not mention internal system variables or the DSPy signature fields (`retrieved_paths_with_content`, `retrieved_paths_without_content`) in the `reasoning`, `answer`, or `code_diff` fields. The output should be user-friendly and not leak implementation details. * Be helpful, precise, and adhere strictly to these rules. Do not hallucinate file paths or content. --- """ question = dspy.InputField(desc="The user's current question.") chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI.") # New Input Fields to make the data split explicit retrieved_paths_with_content = dspy.InputField(desc="A JSON string of files that have been successfully retrieved with their full content.") retrieved_paths_without_content = dspy.InputField(desc="A JSON string of files that have been found but their content has not yet been loaded (e.g., empty or null content).") reasoning = dspy.OutputField( desc="First, determine if the artifacts are sufficient. Then, based on the question, the decision type should be either 'code_change' or 'answer'. Finally, analyze the question and determine the output fields." ) decision = dspy.OutputField( desc="Must be one of: 'answer', 'files', or 'code_change'." ) code_diff = dspy.OutputField( desc=( "If `decision` is 'code_change': the full, complete git diff of the proposed changes.\n" "Leave this field empty if the decision is not 'code_change'." ) ) answer = dspy.OutputField( desc=( "If `decision` is 'answer': a comprehensive, well-structured explanation in Markdown.\n" "If `decision` is 'files': a JSON-formatted list of required file paths.\n" "If `decision` is 'code_change': an optional, high-level summary of the proposed changes. Leave empty if no summary is needed." ) ) class CodeRagQuestionDecider(dspy.Module): def __init__(self, log_dir: str = "ai_payloads", history_formatter: Optional[Callable[[List[models.Message]], str]] = None): super().__init__() self.log_dir = log_dir # Initializes the dspy Predict module with the refined system prompt self.decider = dspy.ChainOfThought(QuestionDecider) self.history_formatter = history_formatter or self._default_history_formatter def _default_history_formatter(self, history: List[models.Message]) -> str: return "\n".join( f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" for msg in history ) async def forward( self, question: str, history: List[models.Message], retrieved_data: Dict[str, Any] ) -> Tuple[str, str, str]: """ Runs the decision model with the current user input and code context. Args: question: The user's query. history: The chat history as a list of strings. retrieved_data: A dictionary mapping file paths to file contents. Returns: A tuple of (answer, decision, code_diff). """ # --- INTERNAL LOGIC TO SPLIT DATA, WITH NULL/POINTER CHECKS --- with_content = [] without_content = [] # Safely access the 'retrieved_files' key, defaulting to an empty list files_to_process = retrieved_data.get("retrieved_files", []) if not isinstance(files_to_process, list): # Fallback for unexpected data format files_to_process = [] for file in files_to_process: # Check if 'file' is not None and is a dictionary if isinstance(file, dict): file_path = file.get("file_path") file_content = file.get("content") # Check if file_content is a non-empty string if file_content and isinstance(file_content, str): with_content.append({"file_path": file_path, "content": file_content}) # Check for a file path without content elif file_path: without_content.append({"file_path": file_path}) # Ensure valid JSON strings for the model input retrieved_with_content_json = json.dumps(with_content, indent=2) retrieved_without_content_json = json.dumps(without_content, indent=2) history_text = self.history_formatter(history) input_payload = { "question": question, "chat_history": history_text, "retrieved_paths_with_content": retrieved_with_content_json, "retrieved_paths_without_content": retrieved_without_content_json, } prediction = await self.decider.acall(**input_payload) # Defensive handling and a clean way to access prediction fields decision = getattr(prediction, "decision", "").lower() answer = getattr(prediction, "answer", "") code_diff = getattr(prediction, "code_diff", "") reasoning = getattr(prediction, "reasoning", "") return answer, reasoning, decision, code_diff