import json
import os
from app.core.pipelines.validator import Validator, TokenLimitExceededError
from app.db import models
from typing import List, Dict, Any, Tuple, Optional, Callable
PROMPT_TEMPLATE = """
### đź§ **Core Directives**
You are a specialized AI assistant for software engineering tasks. Your responses—providing an answer, suggesting a code change, or requesting more files—must be based **exclusively** on the provided codebase content. Your primary goal is to be helpful and accurate while adhering strictly to the following directives.
-----
## 1. Data Analysis and Availability
* **Analyze the User's Request:** Carefully examine the **`question`** and **`chat_history`** to understand what the user wants.
* **Source of Information:** The only information you can use to generate a code-related answer comes from the files provided in the **`retrieved_paths_with_content`** list.
* **File Data & Availability**
* **`retrieved_paths_with_content`**: Files with content available.
* **`retrieved_paths_without_content`**: Files that exist but content is not loaded.
-----
## 2. Decision Logic
You must choose one of three mutually exclusive decisions: `answer`, `code_change`, or `files`.
### `decision='answer'`
* Choose this if you have all necessary info to explain a non-code-modification question.
### `decision='code_change'`
* Choose this for any code manipulation (modify, create, delete).
* Provide a high-level strategy plan in the `answer` field as a numbered list.
* Provide the actual code instructions in a valid JSON list format.
### `decision='files'`
* Request more files from `retrieved_paths_without_content`.
-----
## 3. Output Format
You MUST respond in valid JSON format with the following fields:
- `reasoning`: Your step-by-step logic.
- `decision`: Either 'answer', 'files', or 'code_change'.
- `answer`: Depending on decision (Markdown text, file list, or high-level plan).
- `instructions`: (Only for 'code_change') The JSON list of file operations.
User Question: {question}
Chat History: {chat_history}
Available Content: {retrieved_paths_with_content}
Missing Content: {retrieved_paths_without_content}
Strict JSON Output:"""
class CodeRagQuestionDecider:
def __init__(self, log_dir: str = "ai_payloads", history_formatter: Optional[Callable[[List[models.Message]], str]] = None):
self.log_dir = log_dir
self.history_formatter = history_formatter or self._default_history_formatter
self.validator = Validator()
def _default_history_formatter(self, history: List[models.Message]) -> str:
return "\n".join(
f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}"
for msg in history
)
async def forward(
self,
question: str,
history: List[models.Message],
retrieved_data: Dict[str, Any],
llm_provider = None
) -> Tuple[str, str, str, Optional[List[Dict]]]:
if not llm_provider:
raise ValueError("LLM Provider is required.")
with_content = []
without_content = []
files_to_process = retrieved_data.get("retrieved_files", [])
for file in files_to_process:
if isinstance(file, dict):
file_path = file.get("file_path")
file_content = file.get("content")
if file_content and isinstance(file_content, str):
with_content.append({"file_path": file_path, "content": file_content})
elif file_path:
without_content.append({"file_path": file_path})
history_text = self.history_formatter(history)
prompt = PROMPT_TEMPLATE.format(
question=question,
chat_history=history_text,
retrieved_paths_with_content=json.dumps(with_content, indent=2),
retrieved_paths_without_content=json.dumps(without_content, indent=2)
)
try:
self.validator.precheck_tokensize({"prompt": prompt})
except TokenLimitExceededError as e:
raise e
# Call LLM
response = await llm_provider.acompletion(prompt=prompt, response_format={"type": "json_object"})
content = response.choices[0].message.content
try:
data = json.loads(content)
answer = data.get("answer", "")
reasoning = data.get("reasoning", "")
decision = data.get("decision", "answer").lower()
instructions = data.get("instructions")
return answer, reasoning, decision, instructions
except json.JSONDecodeError:
# Fallback if LLM fails to provide valid JSON despite instructions
return content, "Failed to parse JSON", "answer", None