diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index dced2c7..f86a5ef 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -61,7 +61,6 @@ ) logging.basicConfig(level=settings.LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s') - logging.getLogger("dspy").setLevel(logging.DEBUG) # Global settings for LiteLLM to handle provider-specific quirks litellm.drop_params = True diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 5b06f56..0b1837b 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -202,8 +202,8 @@ get_from_yaml(["stt_provider", "model_name"]) or \ config_from_pydantic.stt_provider.model_name or "gemini-2.5-flash" self.STT_API_KEY: Optional[str] = os.getenv("STT_API_KEY") or \ - get_from_yaml(["stt_provider", "api_key"]) or \ - self.GEMINI_API_KEY + get_from_yaml(["stt_provider", "api_key"]) or \ + (self.OPENAI_API_KEY if self.STT_PROVIDER == "openai" else self.GEMINI_API_KEY) def save_to_yaml(self): """Saves current settings back to config.yaml.""" diff --git a/ai-hub/app/core/pipelines/code_changer.py b/ai-hub/app/core/pipelines/code_changer.py index 8aed3f8..163c9a0 100644 --- a/ai-hub/app/core/pipelines/code_changer.py +++ b/ai-hub/app/core/pipelines/code_changer.py @@ -1,125 +1,49 @@ -import dspy import json import os from typing import List, Dict, Any, Tuple, Optional, Callable -from app.core.pipelines.validator import Validator,TokenLimitExceededError -class CodeChanger(dspy.Signature): - """ - ### 🧠 Core Directives +PROMPT_TEMPLATE = """ +### 🧠 Core Directives - You are a code generation assistant specialized in producing **one precise and complete code change** per instruction. Your output must be a strict JSON object containing: +You are a code generation assistant specialized in producing **one precise and complete code change** per instruction. Your output must be a strict JSON object containing: - - `reasoning`: A concise explanation of the change. - - `content`: The **full content of the file** (or an empty string for deletions). +- `reasoning`: A concise explanation of the change. +- `content`: The **full content of the file** (or an empty string for deletions). - No extra output is allowed—**only the JSON object.** +--- - --- +### 1. Input Structure - ### 1. Input Structure +- `overall_plan`: {overall_plan} +- `instruction`: {instruction} +- `filepath`: {filepath} +- `original_files`: {original_files} +- `updated_files`: {updated_files} - You will be provided: +----- - - `instruction`: A plain-text directive specifying the desired code change. - - `original_files`: A list of unmodified files from the codebase: - ```json - [ - { - "file_path": "/app/main.py", - "content": "# main.py\\n\\ndef new_function():\\n pass\\n\\nnew_function()\\n" - }, - ... - ] - ```` +### 2. 💻 Code Generation Rules - - `updated_files`: Files already modified in prior steps: +Please provide **one complete and functional code file** per request, for the specified `file_path`. You must output the **entire, modified file**. - +* **Identical Code Sections:** Use the `#[unchanged_section]|||` syntax for large, sequential blocks of code that are not being modified. +* **Complete File Output:** Always provide the **full file contents** in the `content` block. Do not use placeholders like `...`. +* **Imports:** Ensure all required imports are included. - ```json - [ - { - "file_path": "/app/main.py", - "reasoning": "...", - "content": "..." - }, - ... - ] - ``` +--- - ----- +### 3. Output Format - ### 2\. 💻 Code Generation Rules +Return exactly one JSON object: +{{ + "reasoning": "Brief explanation.", + "content": "Full file content" +}} +""" - Please provide **one complete and functional code file** per request, for the specified `file_path`. You must output the **entire, modified file**. - - --- - ### **1. Syntax and Formatting** - - * **Identical Code Sections:** Use the `#[unchanged_section]|||` syntax for large, sequential blocks of code that are not being modified. This is the **only permitted syntax** for indicating unchanged code. Do not use this for small or scattered sections. - * **Complete File Output:** Always provide the **full file contents** in the `content` block, including all necessary imports and dependencies. Do not use placeholders like `...`, or comments such as `# rest of the code`, or `# existing code`. - * **Imports:** Ensure all required imports are included in the provided file. Use the same import syntax as the original file and prefer absolute paths. - * **No Example Execution:** Preserve original line breaks; do not compress output code to a single line, which would interfere with the Git diff. Avoid adding example execution code like a main function or function calls; instead, use comments where possible. - - --- - - ### **2. Quality and Best Practices** - - * **Completeness:** The code you provide must be **self-contained and ready to execute**. All necessary definitions and functions must be fully resolved within the output. - * **Modularity and Style:** The code must be well-structured, modular, and follow best practices. Use clear naming conventions and add concise comments to explain complex logic. - * **Single File Modification:** Each response should **only modify a single file**. Provide the complete, modified file. - * **Token Limit Handling:** If modifying a large file might exceed the token limit, prioritize providing the partial change. Leave comments in the code for sections you haven't completed, and use the `#[unchanged_section]` syntax to avoid outputting the remaining unmodified code. - - #### 🔹 Change Types - - * **File Modification**: - - Provide the entire updated file in `content`. - * **File Creation**: - - Include full file content in `content`. - * **File Deletion**: - - Set `content` to `""` and explain the deletion in `reasoning`. - - ----- - - ### 3\. Output Format - - Return exactly one JSON object: - - ```json - { - "reasoning": "Brief explanation of the change.", - "content": "Full file content here" - } - ``` - - **Do not output any explanation, headers, or text outside this JSON.** - """ - - overall_plan = dspy.InputField(desc="The high-level strategy for the code changes.") - instruction = dspy.InputField(desc="The specific instruction for this step of the code change.") - filepath = dspy.InputField(desc="The path of the file to be changed, created, or deleted.") - - original_files = dspy.InputField( - desc="A JSON list of dictionaries with 'file_path' and 'content' for the original files." - ) - - updated_files = dspy.InputField( - desc="A JSON list of dictionaries with 'file_path' and 'content' for files modified by previous steps." - ) - - reasoning = dspy.OutputField(desc="Provide a detailed, comprehensive reasoning process for any requested code changes. The explanation must clearly justify all modifications. Remind yourselves, you should duplicate unchanged original code to ensure your new code is comprehensive, use `#[unchanged_section]|||` to save output lines.") - content = dspy.OutputField(desc="The generated full usable code without using abbreviations, placeholders, or comments like `code remains the same`") - - -class CodeRagCodeChanger(dspy.Module): - """ - A single-step module to generate code changes based on user instructions and relevant files. - """ - +class CodeRagCodeChanger: def __init__(self): - super().__init__() - self.code_changer = dspy.ChainOfThought(CodeChanger) + pass async def forward( self, @@ -127,15 +51,16 @@ instruction: str, filepath: str, original_files: List[Dict[str, Any]], - updated_files: List[Dict[str, Any]] + updated_files: List[Dict[str, Any]], + llm_provider = None ) -> Tuple[str, str]: - - # Convert dictionaries to JSON strings for the model + if not llm_provider: + raise ValueError("LLM Provider is required.") + original_json = json.dumps(original_files) updated_json = json.dumps(updated_files) - # Generate prediction - prediction = await self.code_changer.acall( + prompt = PROMPT_TEMPLATE.format( overall_plan=overall_plan, instruction=instruction, filepath=filepath, @@ -143,5 +68,11 @@ updated_files=updated_json ) - # Return code diff and reasoning - return prediction.content, prediction.reasoning \ No newline at end of file + response = await llm_provider.acompletion(prompt=prompt, response_format={"type": "json_object"}) + content = response.choices[0].message.content + + try: + data = json.loads(content) + return data.get("content", ""), data.get("reasoning", "") + except json.JSONDecodeError: + return "", f"Failed to parse JSON: {content}" \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/code_reviewer.py b/ai-hub/app/core/pipelines/code_reviewer.py index d416ec0..ef609c7 100644 --- a/ai-hub/app/core/pipelines/code_reviewer.py +++ b/ai-hub/app/core/pipelines/code_reviewer.py @@ -1,198 +1,66 @@ -import dspy import json from typing import List, Dict, Any, Tuple, Optional, Callable -class CodeReviewerSignature(dspy.Signature): - """ - ### 🧠 Core Directives +PROMPT_TEMPLATE = """ +### 🧠 Core Directives +### **Code Review Directives** +Your role is a specialized code review AI. Your primary task is to review a set of code changes and confirm they **fully and accurately address the user's original request**. - ### **Code Review Directives** - Your role is a specialized code review AI. Your primary task is to review a set of code changes and confirm they **fully and accurately address the user's original request**. - --- - ### **Critical Constraints** - Your review is strictly limited to **code content completeness**. Do not suggest or perform any file splits, moves, or large-scale refactoring. - Your sole goal is to ensure the changes are complete. Focus on identifying and resolving any missing logic, placeholders, or incomplete code. Prioritize completion over refactoring. For example, it's better to leave a duplicated function than to aggressively delete code and replace it with a placeholder. - Your proposed modifications must be specific and detailed, avoiding any large-scale refactoring, that is NOT your job. This constraint is critical to prevent endless rework loops. - Pay close attention to any placeholders, comments, or notes (e.g., "to-do," "unchanged," "same as original") that indicate a section of code is incomplete. These areas are your primary focus for completion. - After repeating calling this assistant, caller expects the code completeness can be satisfied quickly. +--- +### **Critical Constraints** +Your review is strictly limited to **code content completeness**. Do not suggest or perform any file splits, moves, or large-scale refactoring. +Identify and resolve any missing logic, placeholders (like "unchanged," "same as original," "to-do"), or incomplete code. - **Zero** tolerance for those implementations are leaving comments or placeholders and any comments saying "replace with your actual..." +Return exactly one JSON object: +{{ + "reasoning": "A detailed explanation of why the decision was made.", + "decision": "Either 'complete' or 'modify'.", + "answer": "If 'complete', an empty string. If 'modify', the new execution plan instructions in JSON." +}} - There is only one exception that you can leave as it is: - There is a system syntax to denote large, sequential blocks of code that are not being modified: - `#[unchanged_section]|||` e.g. #[unchanged_section]|/app/main.py|10|24 +Input: +- `original_question`: {original_question} +- `execution_plan`: {execution_plan} +- `final_code_changes`: {final_code_changes} +- `original_files`: {original_files} +""" - - If the output is satisfactory, you will signal this. If not, you will - provide a new, detailed, and actionable sequential plan to correct the - deficiencies. - - --- - - ### 1. Input Structure - - You will be provided with: - - - `original_question`: The user's initial request. - - `execution_plan`: The step-by-step plan that was previously generated. - - `final_code_changes`: The list of final code files and their content that - resulted from the execution of the previous plans. - - `original_files`: The original, unmodified files for context. - - --- - - ### 2. Decision Logic - - You must choose one of two mutually exclusive decisions: `complete` or `modify`. - - ### `decision='complete'` - - * **When to use:** Choose this if the `final_code_changes` fully and correctly - address the `original_question` and adhere to the `execution_plan`. The - generated code should be bug-free, well-structured, no placeholder, not partial code, and fully **COMPLETE**. - - ### `decision='modify'` - - #### **When to Use** - - You must evaluate `final_code_changes` to determine whether the generated code is fully implemented and meets the user's original request. - - Choose `decision='modify'` if the `final_code_changes` include comments or indications such as: - - * `"same as before"` - * `"most similar code"` - * Or **any other sign that the code is incomplete or partially implemented** - - These phrases typically suggest that the code is unfinished, uses placeholder content, or lacks full implementation. In such cases, the generated code must be revised to: - - * Be fully implemented and functional - * Contain no vague or placeholder comments - * Meet all requirements outlined in the original user request and execution plan - * Be production-ready, with no unfinished areas remaining - - You should also generate a new execution plan that clearly outlines the necessary changes to achieve full and proper implementation. For those logic need to be implemented, search the original files to locate the real implementation and sepecifically mention where to find those logic to each step of the plans. - - ----- - - ### **High-Level Plan** - - The `answer` field must contain a **high-level strategy plan** for the proposed code changes. This plan should be broken down into a series of **specific, actionable instructions**, presented as a numbered list. - - * Each instruction must be a **discrete, testable step**. This ensures the changes are modular and easy to follow. - * The instructions for creating a new file should be a separate, explicit step that includes the exact, executable code or content to be added. Avoid high-level descriptions; instead, provide a detailed, step-by-step guide for an entry-level developer. For example, specify: "Add a function named sqrt() that accepts a string input and returns a string array. Clearly define the parameters, expected output, and the logic required to cover scenarios 'a', 'b', 'c', and 'd'." * Your sequential steps must eventually form a complete, shippable code solution eventually. Do not use 'to-do' notes or placeholders in early steps, but did not complete those to-dos in later steps. Every step should contribute to the final, functional code. - * Your proposed plan must be fully completed and implemented by the provided steps. Do not create placeholders or incomplete tasks in one step without following through to implement the full logic in a later step. - * The number of steps should be balanced based on the complexity of the code. Avoid breaking the plan into too many fine-grained steps, but also avoid combining a massive change into a single step. Aim for a logical, well-paced sequence that can be followed step-by-step. - - **Example Plan Breakdown:** - - * **Plan Breakdown:** - 1. **Complete code in /workspace.py** Replace the comment mentioned at line 5212 (`blablabla`) with the code from original file /work.py from line 142 to 152. - - ----- - - ### **Code Change Instructions Format** - - The response must be a **JSON list of objects**. No other text, fields, or conversational elements are allowed. - - ```json - [ - { - "file_path": "/app/main.py", - "action": "modify", - "change_instruction": "Complete code in /workspace.py** Replace the comment mentioned at line 5212 (`blablabla`) with the code from original file /work.py from line 142 to 152", - "original_files": ["/app/core/services/tts.py", "/app/core/services/stt.py", "/app/main.py"], - "updated_files": ["/app/main.py"] - } - ... - ] - ```` - - ----- - - #### **Parameter Breakdown** - * **`file_path`** (string): The path for the file to be changed, created, or deleted. Must begin with a `/`. - * **New files**: Use a valid, non-existent path. - * **Deletions**: Use the path of the file to be removed. - * **`action`** (string): The operation on the file. Must be one of: `"create"`, `"delete"`, `"move"`, or `"modify"`. - * `"create"`: Creates a new file from scratch. - * `"delete"`: Deletes the entire file. - * `"move"`: This action renames or moves a file to a new path. It does not perform any code changes. The change_instruction for this action must explicitly state the new file path, which should be wrapped in backticks (``). - Example: "change_instruction": "Move the file to `/new/path/file.py`." - * `"modify"`: Makes partial code changes to an existing file, including inserting, deleting, or replacing lines of code. - * **`change_instruction`** (string): A clear and specific instruction for the code changer. - * **New files**: Briefly describe the file's purpose. - * **Deletions**: State the intent to delete the file. - * **`original_files`** (list of strings): Paths to pre-existing files needed for read-only context. This allows the AI to understand the change instruction based on the original files. This list should reference files from `retrieved_paths_with_content`. Use `[]` if no context is needed. Paths must begin with a `/`. - * **`updated_files`** (list of strings): Paths to files previously modified in the current session. This allows the AI to understand the changes made so far and handle incremental updates. Use this for referencing changes from earlier steps. Use `[]` if no previous changes are relevant. Paths must begin with a `/`. - ----- - - **Execution Note:** The list represents a stateful, ordered sequence of operations. Each subsequent step operates on the results of the previous ones. - - * `original_files`: This parameter provides a consistent, baseline view of the project's files before any modifications. It is essential for steps that require the original file content as a reference. - * `updated_files`: This parameter provides the **cumulative state** of the project after all prior steps have completed. It should be used to make sequential changes that depend on the output of previous operations. For stateless or independent operations (e.g., creating a new file from scratch), this parameter is not required. - Try your best to add those two fields if possible. - - - ### 3. Output Format - - Return exactly one JSON object: - - ```json - { - "reasoning": "A detailed explanation of why the decision was made.", - "decision": "Either 'complete' or 'modify'.", - "answer": "If 'complete', an empty string. If 'modify', the new execution plan." - } - ``` - """ - - original_question = dspy.InputField(desc="The user's initial question or request.") - execution_plan = dspy.InputField(desc="The high-level plan that was executed.") - final_code_changes = dspy.InputField(desc="A JSON list of the final modified files and their content.") - original_files = dspy.InputField(desc="A JSON list of the original, unmodified files.") - - reasoning = dspy.OutputField( - desc="A step-by-step reasoning process explaining the decision. If the decision is to modify, explain what went wrong and how the new plan addresses it." - ) - decision = dspy.OutputField( - desc="The decision type for the response. Must be 'complete' or 'modify'." - ) - answer = dspy.OutputField( - desc=( - "If `decision` is 'complete', this field should be an empty string.\n" - "If `decision` is 'modify', this field should be a JSON-formatted list of objects representing the new code change instructions." - ) - ) - -class CodeReviewer(dspy.Module): - """ - A pipeline to review and validate code changes against an original request and plan. - """ - +class CodeReviewer: def __init__(self): - super().__init__() - self.reviewer = dspy.ChainOfThought(CodeReviewerSignature) + pass async def forward( self, original_question: str, execution_plan: str, final_code_changes: List[Dict[str, Any]], - original_files: List[Dict[str, Any]] + original_files: List[Dict[str, Any]], + llm_provider = None ) -> Tuple[str, str, str]: + if not llm_provider: + raise ValueError("LLM Provider is required.") - # Convert dictionaries to JSON strings for the model final_code_changes_json = json.dumps(final_code_changes) original_files_json = json.dumps(original_files) - # Generate the review - prediction = await self.reviewer.acall( + prompt = PROMPT_TEMPLATE.format( original_question=original_question, execution_plan=execution_plan, final_code_changes=final_code_changes_json, original_files=original_files_json ) - # Return the decision, reasoning, and any new plan - return prediction.decision, prediction.reasoning, prediction.answer \ No newline at end of file + response = await llm_provider.acompletion(prompt=prompt, response_format={"type": "json_object"}) + content = response.choices[0].message.content + + try: + data = json.loads(content) + decision = data.get("decision", "modify") + reasoning = data.get("reasoning", "") + answer = data.get("answer", "") + if isinstance(answer, list): + answer = json.dumps(answer) + return decision, reasoning, answer + except json.JSONDecodeError: + return "modify", f"Failed to parse JSON: {content}", "" \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/dspy_rag.py b/ai-hub/app/core/pipelines/dspy_rag.py deleted file mode 100644 index 9942888..0000000 --- a/ai-hub/app/core/pipelines/dspy_rag.py +++ /dev/null @@ -1,83 +0,0 @@ -import dspy -import logging -from typing import List, Callable, Optional -from sqlalchemy.orm import Session - -from app.db import models -from app.core.retrievers.base_retriever import Retriever - - -# --- DSPy Signature Class (No Change) --- -class AnswerWithHistory(dspy.Signature): - """Generate a natural and context-aware answer to the user's question using the provided knowledge and conversation history.""" - - context = dspy.InputField(desc="Relevant excerpts from the knowledge base to support the answer.") - chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI, providing conversational context.") - question = dspy.InputField(desc="The user's current question.") - answer = dspy.OutputField(desc="A well-formed answer suitable for delivery in an audio play format.") - - -# --- DSPy RAG Pipeline Class (Updated) --- -class DspyRagPipeline(dspy.Module): - """ - A flexible and extensible DSPy-based RAG pipeline with modular stages. - """ - - def __init__( - self, - # retrievers: List[Retriever], - signature_class: dspy.Signature = AnswerWithHistory, - context_postprocessor: Optional[Callable[[List[str]], str]] = None, - history_formatter: Optional[Callable[[List[models.Message]], str]] = None, - response_postprocessor: Optional[Callable[[str], str]] = None, - ): - super().__init__() - # self.retrievers = retrievers - self.generate_answer = dspy.Predict(signature_class) - - self.context_postprocessor = context_postprocessor or self._default_context_postprocessor - self.history_formatter = history_formatter or self._default_history_formatter - self.response_postprocessor = response_postprocessor - - async def forward(self, question: str, history: List[models.Message], context_chunks :List[str]) -> str: - logging.debug(f"[DspyRagPipeline.forward] Received question: '{question}'") - - # Step 1: Retrieve all document contexts - # context_chunks = [] - # for retriever in self.retrievers: - # context_chunks.extend(retriever.retrieve_context(question, db)) - - context_text = self.context_postprocessor(context_chunks) - - # Step 2: Format history - history_text = self.history_formatter(history) - - # Step 3: Generate response using LLM - # With DSPy and LiteLLM, the signature-based generation handles the prompt building. - # You no longer need to manually build the prompt string. - prediction = await self.generate_answer.aforward( - context=context_text, - chat_history=history_text, - question=question - ) - - raw_response = prediction.answer - - # Step 4: Optional response postprocessing - if self.response_postprocessor: - return self.response_postprocessor(raw_response) - - return raw_response - - # Default context processor: concatenate chunks - def _default_context_postprocessor(self, contexts: List[str]) -> str: - return "\n\n".join(contexts) or "No context provided." - - # Default history formatter: simple speaker prefix - def _default_history_formatter(self, history: List[models.Message]) -> str: - return "\n".join( - f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" - for msg in history - ) - -# Note: The _build_prompt method is removed as DSPy handles this automatically. \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/file_selector.py b/ai-hub/app/core/pipelines/file_selector.py index 47ce4ae..9c689cf 100644 --- a/ai-hub/app/core/pipelines/file_selector.py +++ b/ai-hub/app/core/pipelines/file_selector.py @@ -1,58 +1,68 @@ -import dspy import json from app.db import models -from typing import List, Dict, Any,Tuple +from typing import List, Dict, Any, Tuple -class SelectFiles(dspy.Signature): - """ - You're an **expert file navigator** for a large codebase. Your task is to select the most critical and relevant file paths to answer a user's question. Your decision should be based on the **user's current question** and the **ongoing chat history**. All file paths you select must exist within the provided `retrieved_files` list. +PROMPT_TEMPLATE = """ +You're an **expert file navigator** for a large codebase. Your task is to select the most critical and relevant file paths to answer a user's question. All file paths you select must exist within the provided `retrieved_files` list. - --- +--- - ### File Selection Criteria +### File Selection Criteria - 1. **Prioritize Core Files:** Identify and select files that contain the central logic, definitions, or essential configurations directly related to the user's query and the chat history context. - 2. **Be Selective:** To avoid exceeding token limits, your response must be a small, highly focused set of files. Aim for **2 to 4 files**. Do not select a large number of files. - 3. **Exclude Irrelevant and Unreadable Files:** Discard files that are placeholders or have names unrelated to the user's request. **Crucially, use your knowledge to identify and ignore non-text files, such as compiled binaries (.exe, .o), database files (.db, .sqlite), archived files (.zip, .tar), or images (.jpg, .png).** These are not readable source code files and will not help answer the question. - 4. **Infer User Intent:** If the user or chat history mentions a file path that isn't in the `retrieved_files` list, use that as a strong hint. Find and select the path from the list that is most similar to the one mentioned. You **must** only return a file path that exists in the `retrieved_files` list. If you determine no files are related, return an empty array. - 5. **Completeness Check:** If the `retrieved_files` list already contains all the information you need to answer the question, it is acceptable to return an empty array. +1. **Prioritize Core Files:** Identify files that contain the central logic. +2. **Be Selective:** Aim for **2 to 4 files**. +3. **Exclude Irrelevant and Unreadable Files:** Ignore binaries, images, etc. +4. **Infer User Intent:** Return only file paths that exist in the `retrieved_files` list. - --- +--- - ### Output Format +### Output Format - Your **ONLY** output is a parsable JSON array of strings. Do not include any other text, explanations, or markdown. Your response must begin with `[` and end with `]`. Absolutely no other characters are allowed before or after the JSON array. The strings in the array **must** be enclosed in double quotes. - """ - question = dspy.InputField(desc="The user's current question.") - chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI.") - question = dspy.InputField(desc="The user's current question.") - retrieved_files = dspy.InputField(desc="A JSON string representing a list of all available file paths.") - answer = dspy.OutputField(format=list, desc="A JSON array of strings. Each string element in the array MUST be enclosed in double quotes.") +Return exactly one JSON array of strings: +[ + "/path/to/file1", + "/path/to/file2" +] -class CodeRagFileSelector(dspy.Module): - """ - A single-step module to select relevant files from a list based on a user question. - """ +Input: +- `question`: {question} +- `chat_history`: {chat_history} +- `retrieved_files`: {retrieved_files} +""" + +class CodeRagFileSelector: def __init__(self): - super().__init__() - # Assign the system prompt directly to the dspy.Predict instance. - self.select_files = dspy.ChainOfThought(SelectFiles) + pass + def _default_history_formatter(self, history: List[models.Message]) -> str: return "\n".join( f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" for msg in history ) - async def forward(self, question: str, retrieved_data: List[str], history: List[models.Message]) -> Tuple[List[str], str]: - # Convert the list of strings to a JSON string using json.dumps - # The prompt is now explicitly asking for a JSON array of strings, so you can pass the raw JSON string. + async def forward(self, question: str, retrieved_data: List[str], history: List[models.Message], llm_provider = None) -> Tuple[List[str], str]: + if not llm_provider: + raise ValueError("LLM Provider is required.") + retrieved_json = json.dumps(retrieved_data) - # Call the predictor with the necessary inputs - prediction = await self.select_files.acall( + history_text = self._default_history_formatter(history) + + prompt = PROMPT_TEMPLATE.format( question=question, - chat_history=self._default_history_formatter(history), + chat_history=history_text, retrieved_files=retrieved_json ) - # The prediction.answer should be the list of strings directly as per the output format. - return prediction.answer, prediction.reasoning \ No newline at end of file + response = await llm_provider.acompletion(prompt=prompt, response_format={"type": "json_object"}) + content = response.choices[0].message.content + + try: + data = json.loads(content) + # If the LLM returned a JSON object instead of a list (due to response_format constraint or hallucination) + if isinstance(data, dict) and "files" in data: + return data["files"], "Selected files" + if isinstance(data, list): + return data, "Selected files" + return [], "No files selected" + except json.JSONDecodeError: + return [], f"Failed to parse JSON: {content}" \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/question_decider.py b/ai-hub/app/core/pipelines/question_decider.py index 087aa9b..70f35ad 100644 --- a/ai-hub/app/core/pipelines/question_decider.py +++ b/ai-hub/app/core/pipelines/question_decider.py @@ -1,245 +1,65 @@ -import dspy import json import os -from app.core.pipelines.validator import Validator,TokenLimitExceededError +from app.core.pipelines.validator import Validator, TokenLimitExceededError from app.db import models from typing import List, Dict, Any, Tuple, Optional, Callable -class QuestionDecider(dspy.Signature): - """ +PROMPT_TEMPLATE = """ ### 🧠 **Core Directives** You are a specialized AI assistant for software engineering tasks. Your responses—providing an answer, suggesting a code change, or requesting more files—must be based **exclusively** on the provided codebase content. Your primary goal is to be helpful and accurate while adhering strictly to the following directives. ----- -## 1\. Data Analysis and Availability +## 1. Data Analysis and Availability -This section outlines the process for analyzing user requests and accessing file content to provide a complete and accurate response. Your ability to answer a user's question depends entirely on the data you can access. +* **Analyze the User's Request:** Carefully examine the **`question`** and **`chat_history`** to understand what the user wants. +* **Source of Information:** The only information you can use to generate a code-related answer comes from the files provided in the **`retrieved_paths_with_content`** list. -* **Analyze the User's Request:** Carefully examine the **`question`** and **`chat_history`** to understand what the user wants. This is the most crucial step, as it guides which files you need to retrieve. - -* **Source of Information:** The only information you can use to generate a code-related answer comes from the files provided in the **`retrieved_paths_with_content`** list. You cannot use any information from the **`retrieved_paths_without_content`** list or from any other source. - -* **File Data & Availability ** - -The information you receive is categorized into two mutually exclusive lists. This structure ensures you know exactly which files you can access and which are available to be requested. - -*** - -* **`retrieved_paths_with_content`**: A list of objects, each with a `file_path` and `content` attribute. These are files you have **already requested** and can now use to generate a response or a code change. You cannot request these files again. - - * **Example:** - ```json - [ - { - "file_path": "src/utils/helpers.js", - "content": "// Helper function to format a date...\nexport const formatDate = (date) => { /*...*/ };" - } - ] - ``` - -* **`retrieved_paths_without_content`**: A list of objects, each with a single `file_path` attribute. These files exist in the codebase but have **not yet been requested**. You cannot access their content. The paths in this list are your only valid options for a `files` decision. - - * **Example:** - ```json - [ - {"file_path": "src/components/Button.jsx"}, - {"file_path": "src/styles/theme.css"}, - {"file_path": "tests/unit/helpers.test.js"} - ] - ``` +* **File Data & Availability** +* **`retrieved_paths_with_content`**: Files with content available. +* **`retrieved_paths_without_content`**: Files that exist but content is not loaded. ----- -## 2\. Decision Logic +## 2. Decision Logic You must choose one of three mutually exclusive decisions: `answer`, `code_change`, or `files`. ### `decision='answer'` - - * **When to use:** Choose this if you have all the necessary information in `retrieved_paths_with_content` to provide a complete and comprehensive explanation for a non-code-modification question. - * **Example use cases:** - * "What does this function do?" - * "Explain the architecture of the `app` folder." - * "Why is this test failing?" - * "Which files are involved in the user authentication flow?" - * **Content:** The `answer` field must contain a detailed, well-structured explanation in Markdown. - * **Special Case:** If a user requests a file that is **not** present in either `retrieved_paths_with_content` or `retrieved_paths_without_content`, you **must** choose `answer` and explain that the file could not be found. This rule applies when the user's intent is to modify an existing file. If they are asking you to create a new file, that falls under `code_change`. - ------ +* Choose this if you have all necessary info to explain a non-code-modification question. ### `decision='code_change'` - #### **When to Use** - - Choose this decision when the user's request involves any manipulation of code. This includes: - - * **Modifying existing code**: Fixing a bug, refactoring a function, implementing a new feature within an existing file. - * **Creating new code**: Generating a new file from scratch (e.g., "create a new file for a utility function"). - * **Removing code**: Deleting a file or a block of code. - * **Displaying code**: Responding to requests like "show me the full code for..." or "update this file." - - \***Pre-conditions**: You must have all the relevant files with content in `retrieved_paths_with_content` to propose the change. - - ----- - - ### **High-Level Plan** - - The `answer` field must contain a **high-level strategy plan** for the proposed code changes. This plan should be broken down into a series of **specific, actionable instructions**, presented as a numbered list. - - * Each instruction must be a **discrete, testable step**. This ensures the changes are modular and easy to follow. - * The instructions for creating a new file should be a separate, explicit step that includes the exact, executable code or content to be added. Avoid high-level descriptions; instead, provide a detailed, step-by-step guide for an entry-level developer. For example, specify: "Add a function named sqrt() that accepts a string input and returns a string array. Clearly define the parameters, expected output, and the logic required to cover scenarios 'a', 'b', 'c', and 'd'." * Your sequential steps must eventually form a complete, shippable code solution eventually. Do not use 'to-do' notes or placeholders in early steps, but did not complete those to-dos in later steps. Every step should contribute to the final, functional code. - * Your proposed plan must be fully completed and implemented by the provided steps. Do not create placeholders or incomplete tasks in one step without following through to implement the full logic in a later step. - * The number of steps should be balanced based on the complexity of the code. Avoid breaking the plan into too many fine-grained steps, but also avoid combining a massive change into a single step. Aim for a logical, well-paced sequence that can be followed step-by-step. - - **Example Plan Breakdown:** - - * **User Request:** "Refactor the `create_app` function to improve readability and maintainability by breaking it into smaller helper functions." - * **Plan Breakdown:** - 1. **Extract initialization logic:** Create a new function `initialize_database` to handle all database setup. - 2. **Modularize middleware:** Implement a `setup_middlewares` function to handle all middleware configurations. - 3. **Group route registration:** Create a new function `register_routes` to modularize all route registrations. - 4. **Isolate error handling:** Implement a dedicated function `setup_error_handling` to set up error handling logic. - 5. **Update the main function:** Modify the `create_app` function to call these new helper functions in the correct sequence. - - ----- - - ### **Code Change Instructions Format** - - The response must be a **JSON list of objects**. No other text, fields, or conversational elements are allowed. - - ```json - [ - { - "file_path": "/app/main.py", - "action": "modify", - "change_instruction": "Refactor the create_app function to improve readability and maintainability by breaking it into smaller helper functions.", - "original_files": ["/app/core/services/tts.py", "/app/core/services/stt.py", "/app/main.py"], - "updated_files": ["/app/main.py"] - } - ... - ] - ```` - - ----- - - #### **Parameter Breakdown** - * **`file_path`** (string): The path for the file to be changed, created, or deleted. Must begin with a `/`. - * **New files**: Use a valid, non-existent path. - * **Deletions**: Use the path of the file to be removed. - * **`action`** (string): The operation on the file. Must be one of: `"create"`, `"delete"`, `"move"`, or `"modify"`. - * `"create"`: Creates a new file from scratch. - * `"delete"`: Deletes the entire file. - * `"move"`: This action renames or moves a file to a new path. It does not perform any code changes. The change_instruction for this action must explicitly state the new file path, which should be wrapped in backticks (``). - Example: "change_instruction": "Move the file to `/new/path/file.py`." - * `"modify"`: Makes partial code changes to an existing file, including inserting, deleting, or replacing lines of code. - * **`change_instruction`** (string): A clear and specific instruction for the code changer. - * **New files**: Briefly describe the file's purpose. - * **Deletions**: State the intent to delete the file. - * **`original_files`** (list of strings): Paths to pre-existing files needed for read-only context. This allows the AI to understand the change instruction based on the original files. This list should reference files from `retrieved_paths_with_content`. Use `[]` if no context is needed. Paths must begin with a `/`. - * **`updated_files`** (list of strings): Paths to files previously modified in the current session. This allows the AI to understand the changes made so far and handle incremental updates. Use this for referencing changes from earlier steps. Use `[]` if no previous changes are relevant. Paths must begin with a `/`. - ----- - - **Execution Note:** The list represents a stateful, ordered sequence of operations. Each subsequent step operates on the results of the previous ones. - - * `original_files`: This parameter provides a consistent, baseline view of the project's files before any modifications. It is essential for steps that require the original file content as a reference. - * `updated_files`: This parameter provides the **cumulative state** of the project after all prior steps have completed. It should be used to make sequential changes that depend on the output of previous operations. For stateless or independent operations (e.g., creating a new file from scratch), this parameter is not required. - Try your best to add those two fields if possible. - - #### **Operational Constraints** - The format for each step is limited to modifying a single file's content at a time. This means that a single operation, such as moving code between two different files, is not possible. - Instead, you must handle this type of change as a two-part process: - 1. **Replicate** the code in the new file as one step. - 2. **Delete** the original code from the source file in a subsequent step. - This approach circumvents the single-file limitation and allows for multi-file changes. - - ### **Best Practices** - * **Prioritize Creation and Addition Steps First:** Always put adding new code and logic before steps of removing modifying or refactoring existing code. This approach ensures that you don't accidentally lose functionality during a change. - * **Be Conservative with Deletions:** Avoid deleting large blocks of code unless you are absolutely certain they are no longer needed. Mass deletion can be risky and is often a sign of an incomplete understanding of the codebase. - * **Consolidate Gradually:** While code consolidation is a good goal, it's best to do it in small, incremental steps. An overly aggressive approach can be difficult to review and may lead to unexpected bugs. A gradual, measured approach is more likely to be accepted by the team and result in a more stable codebase. +* Choose this for any code manipulation (modify, create, delete). +* Provide a high-level strategy plan in the `answer` field as a numbered list. +* Provide the actual code instructions in a valid JSON list format. ### `decision='files'` - When more files are needed to fulfill the user's request, use this decision to retrieve them. This decision is suitable for a subset of files. - - The files you request **must** be present in the `retrieved_paths_without_content` list. **Do not** request files that are already in the `retrieved_paths_with_content` list. - - **Request a small, focused set of files** (typically 2-4). - **Analyze the fetched content** (which will appear in `retrieved_paths_with_content`), ensure any of files won't be requested again. - **Repeat** Requesting more files that ONLY exit in `retrieved_paths_without_content` if necessary. - - When the `files` decision is chosen, your response must be a **JSON list of strings**. Each string should be a complete, explicit file path. - The response must be a pure JSON array containing only the file paths you want to retrieve. Do not include any nested objects, additional keys, or conversational text. - - * **Example:** - [ - "/app/core/services/tts.py", - "/app/core/services/stt.py", - "/app/main.py" - ] - - **Constraints & Selection Criteria:** - - * **Format**: The JSON must contain only file paths. Do not include any other text, wildcards, or conversational elements. - * **Path Requirements**: Every file path must begin with a `/`. **Do not** include any paths not present in the `retrieved_paths_without_content` list. - * **Relevance**: Prioritize files that contain the core logic most relevant to the user's query. - * **Efficiency**: To avoid exceeding token limits, be highly selective. Request a small, focused set of **2-4 files**. - * **Allowed File Types**: The system is permitted to request files only if they fall into one of the following two categories: - 1. **Common Code and Text Files**: Files with standard suffixes for coding, configuration, and text content (e.g., `.js`, `.ts`, `.html`, `.css`, `.json`, `.md`, `.txt`, `.py`, `.sh`). - 2. **Explicitly Requested Text Files**: Any file, regardless of its suffix, that the user has explicitly requested and is confirmed to contain only pure string content. - * **Exclusions**: **Do not** request any binary files or other non-text formats (e.g., `.exe`, `.db`, `.zip`, `.jpg`, `.mp4`). - -Both of these options allow for the flexibility you need while maintaining a clear and secure policy against accessing non-text files. Option 2 is a bit more formal and might be better suited for a detailed policy document. * **Inference**: If the user's request or chat history references a specific file, use that as a strong hint to find the most similar path in the list. - * **Proactive Planning**: If the user's request implies a code change but file content is missing, proactively request the files you anticipate needing to successfully and correctly generate a code plan. This is your only opportunity to retrieve these files. - * **Redundancy**: **Do not** re-request files that are already available in `retrieved_paths_with_content`. - - **Other Tips:** - * Your decision-making process for **`code_change`** must include an evaluation of the user's request in the context of the codebase's size and complexity. - * If you've already requested multiple files and still find the information insufficient to fulfill the user's request, **narrow the scope of the question** based on the files you currently have. It’s okay if your response does not cover *all* necessary code changes—just make sure to explain this clearly in the reasoning. - * **Do NOT repeatedly or indefinitely request more files.** Be proactive in working with what is already available. - * If the request is for a **general code change** (e.g., refactoring the entire project) and the **codebase is small**, providing a `code_change` is a reasonable decision. - * If the request is too broad and the codebase is **large or complex** (as determined by `retrieved_paths_with_content`), you should **avoid** choosing `code_change`. Instead, guide the user to narrow the scope of their request before proceeding. +* Request more files from `retrieved_paths_without_content`. ----- -## 3\. Final Output Requirements +## 3. Output Format - * **Strict Structure:** Your output must strictly adhere to the specified JSON format. - * **No Internal Leaks:** Do not mention internal system variables or the DSPy signature fields (`retrieved_paths_with_content`, `retrieved_paths_without_content`) in your reasoning or answer fields. The output should be user-friendly. - * **Precision:** Be helpful, precise, and adhere strictly to these rules. Do not hallucinate file paths or content. - """ - question = dspy.InputField(desc="The user's current question.") - chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI.") - # New Input Fields to make the data split explicit - retrieved_paths_with_content = dspy.InputField(desc="A JSON list of file paths with their full content available.") - retrieved_paths_without_content = dspy.InputField(desc="A JSON list of file paths that exist but are not yet loaded.") +You MUST respond in valid JSON format with the following fields: +- `reasoning`: Your step-by-step logic. +- `decision`: Either 'answer', 'files', or 'code_change'. +- `answer`: Depending on decision (Markdown text, file list, or high-level plan). +- `instructions`: (Only for 'code_change') The JSON list of file operations. - reasoning = dspy.OutputField( - desc="Step-by-step reasoning that explains the chosen `decision` and prepares the final output. This should include an analysis of the user's intent, the availability of required files, and the rationale behind the decision. If the decision involves using files, clearly state which files are already available, which additional files are needed, and why." - ) +User Question: {question} +Chat History: {chat_history} +Available Content: {retrieved_paths_with_content} +Missing Content: {retrieved_paths_without_content} - decision = dspy.OutputField( - desc="The decision type for the response. Must be one of: 'answer', 'files', or 'code_change'." - ) +Strict JSON Output:""" - answer = dspy.OutputField( - desc=( - "If `decision` is 'answer': a comprehensive, well-structured explanation in Markdown.\n" - "If `decision` is 'files': a JSON-formatted list of file paths to retrieve.\n" - "If `decision` is 'code_change': a high-level strategy plan for the proposed code changes." - ) - ) - -class CodeRagQuestionDecider(dspy.Module): - +class CodeRagQuestionDecider: def __init__(self, log_dir: str = "ai_payloads", history_formatter: Optional[Callable[[List[models.Message]], str]] = None): - super().__init__() self.log_dir = log_dir - # Initializes the dspy Predict module with the refined system prompt - self.decider = dspy.ChainOfThought(QuestionDecider) self.history_formatter = history_formatter or self._default_history_formatter self.validator = Validator() - def _default_history_formatter(self, history: List[models.Message]) -> str: return "\n".join( f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" @@ -249,65 +69,51 @@ async def forward( self, question: str, - history: List[models.Message], - retrieved_data: Dict[str, Any] - ) -> Tuple[str, str, str]: - """ - Runs the decision model with the current user input and code context. + history: List[models.Message], + retrieved_data: Dict[str, Any], + llm_provider = None + ) -> Tuple[str, str, str, Optional[List[Dict]]]: + if not llm_provider: + raise ValueError("LLM Provider is required.") - Args: - question: The user's query. - history: The chat history as a list of strings. - retrieved_data: A dictionary mapping file paths to file contents. - - Returns: - A tuple of (answer, decision). - """ - - # --- INTERNAL LOGIC TO SPLIT DATA, WITH NULL/POINTER CHECKS --- with_content = [] without_content = [] - - # Safely access the 'retrieved_files' key, defaulting to an empty list files_to_process = retrieved_data.get("retrieved_files", []) - if not isinstance(files_to_process, list): - # Fallback for unexpected data format - files_to_process = [] for file in files_to_process: - # Check if 'file' is not None and is a dictionary if isinstance(file, dict): file_path = file.get("file_path") file_content = file.get("content") - - # Check if file_content is a non-empty string if file_content and isinstance(file_content, str): with_content.append({"file_path": file_path, "content": file_content}) - # Check for a file path without content elif file_path: without_content.append({"file_path": file_path}) - # Ensure valid JSON strings for the model input - retrieved_with_content_json = json.dumps(with_content, indent=2) - retrieved_without_content_json = json.dumps(without_content, indent=2) - history_text = self.history_formatter(history) - input_payload = { - "question": question, - "chat_history": history_text, - "retrieved_paths_with_content": retrieved_with_content_json, - "retrieved_paths_without_content": retrieved_without_content_json, - } + + prompt = PROMPT_TEMPLATE.format( + question=question, + chat_history=history_text, + retrieved_paths_with_content=json.dumps(with_content, indent=2), + retrieved_paths_without_content=json.dumps(without_content, indent=2) + ) try: - self.validator.precheck_tokensize(input_payload) + self.validator.precheck_tokensize({"prompt": prompt}) except TokenLimitExceededError as e: raise e - prediction = await self.decider.acall(**input_payload) - - # Defensive handling and a clean way to access prediction fields - decision = getattr(prediction, "decision", "").lower() - answer = getattr(prediction, "answer", "") - reasoning = getattr(prediction, "reasoning", "") - return answer, reasoning, decision \ No newline at end of file + # Call LLM + response = await llm_provider.acompletion(prompt=prompt, response_format={"type": "json_object"}) + content = response.choices[0].message.content + + try: + data = json.loads(content) + answer = data.get("answer", "") + reasoning = data.get("reasoning", "") + decision = data.get("decision", "answer").lower() + instructions = data.get("instructions") + return answer, reasoning, decision, instructions + except json.JSONDecodeError: + # Fallback if LLM fails to provide valid JSON despite instructions + return content, "Failed to parse JSON", "answer", None \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/rag_pipeline.py b/ai-hub/app/core/pipelines/rag_pipeline.py new file mode 100644 index 0000000..f362356 --- /dev/null +++ b/ai-hub/app/core/pipelines/rag_pipeline.py @@ -0,0 +1,65 @@ +import logging +from typing import List, Callable, Optional +from sqlalchemy.orm import Session + +from app.db import models + +class RagPipeline: + """ + A flexible and extensible RAG pipeline updated to remove DSPy dependency. + """ + + def __init__( + self, + context_postprocessor: Optional[Callable[[List[str]], str]] = None, + history_formatter: Optional[Callable[[List[models.Message]], str]] = None, + response_postprocessor: Optional[Callable[[str], str]] = None, + ): + self.context_postprocessor = context_postprocessor or self._default_context_postprocessor + self.history_formatter = history_formatter or self._default_history_formatter + self.response_postprocessor = response_postprocessor + + async def forward(self, question: str, history: List[models.Message], context_chunks: List[str], llm_provider=None) -> str: + logging.debug(f"[RagPipeline.forward] Received question: '{question}'") + + context_text = self.context_postprocessor(context_chunks) + history_text = self.history_formatter(history) + + # Step 3: Generate response using manual prompt + prompt = self._build_prompt(context_text, history_text, question) + + if not llm_provider: + raise ValueError("LLM Provider is required for RAG pipeline.") + + prediction = await llm_provider.acompletion(prompt=prompt) + raw_response = prediction.choices[0].message.content + + # Step 4: Optional response postprocessing + if self.response_postprocessor: + return self.response_postprocessor(raw_response) + + return raw_response + + def _build_prompt(self, context, history, question): + return f"""Generate a natural and context-aware answer to the user's question using the provided knowledge and conversation history. + +Relevant excerpts from the knowledge base: +{context} + +Conversation History: +{history} + +User Question: {question} + +Answer:""" + + # Default context processor: concatenate chunks + def _default_context_postprocessor(self, contexts: List[str]) -> str: + return "\n\n".join(contexts) or "No context provided." + + # Default history formatter: simple speaker prefix + def _default_history_formatter(self, history: List[models.Message]) -> str: + return "\n".join( + f"{'Human' if msg.sender == 'user' else 'Assistant'}: {msg.content}" + for msg in history + ) \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/utils.py b/ai-hub/app/core/pipelines/utils.py index 0a60eb1..8cb86bb 100644 --- a/ai-hub/app/core/pipelines/utils.py +++ b/ai-hub/app/core/pipelines/utils.py @@ -1,42 +1,11 @@ -import dspy import json import os from datetime import datetime -def log_dspy_history_to_file() -> None: - log_dir = "ai_payloads" - """ - Logs only the output of dspy.inspect_history(n=1) to a timestamped JSON file. - """ +def log_status(msg: str) -> None: + log_dir = "ai_logs" os.makedirs(log_dir, exist_ok=True) - timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - filename = os.path.join(log_dir, f"dspy_history_{timestamp}.json") - - # Capture the DSPy history by redirecting stdout - from io import StringIO - import sys - - # Create a string buffer to capture the output - old_stdout = sys.stdout - sys.stdout = history_capture = StringIO() - - # Inspect the last interaction - dspy.inspect_history(n=1) - - # Restore stdout - sys.stdout = old_stdout - - # Get the captured history and clean it up - dspy_history = history_capture.getvalue().strip() - - log_data = { - "dspy_history": dspy_history - } - - try: - with open(filename, "w", encoding="utf-8") as f: - json.dump(log_data, f, indent=4) - print(f"[LOG] DSPy history saved to {filename}") - except IOError as e: - print(f"[ERROR] Failed to write log file {filename}: {e}") \ No newline at end of file + filename = os.path.join(log_dir, f"status_{timestamp}.txt") + with open(filename, "w", encoding="utf-8") as f: + f.write(msg) \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 4d1b53c..b304abb 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -6,7 +6,6 @@ from .tts.gemini import GeminiTTSProvider from .tts.gcloud_tts import GCloudTTSProvider from .stt.gemini import GoogleSTTProvider -from dspy.clients.base_lm import BaseLM from openai import AsyncOpenAI import litellm @@ -62,7 +61,7 @@ return list(_stt_registry.keys()) # --- 3. The Factory Functions --- -def get_llm_provider(provider_name: str, model_name: str = "", system_prompt: str = None, api_key_override: str = None, **kwargs) -> BaseLM: +def get_llm_provider(provider_name: str, model_name: str = "", system_prompt: str = None, api_key_override: str = None, **kwargs) -> GeneralProvider: """Factory function to get the appropriate, pre-configured LLM provider, with optional system prompt.""" # helper for masked/null keys diff --git a/ai-hub/app/core/providers/llm/general.py b/ai-hub/app/core/providers/llm/general.py index 179232b..9424eb3 100644 --- a/ai-hub/app/core/providers/llm/general.py +++ b/ai-hub/app/core/providers/llm/general.py @@ -1,27 +1,16 @@ import litellm -from dspy.clients.base_lm import BaseLM -class GeneralProvider(BaseLM): +class GeneralProvider: def __init__(self, model_name: str, api_key: str, system_prompt: str = None, **kwargs): self.model_name = model_name self.api_key = api_key self.system_prompt = system_prompt + self.kwargs = kwargs # Validate API Key early if not api_key or "*" in str(api_key): raise ValueError(f"Invalid or missing API key for LLM provider '{model_name}'. Please configure it in Settings.") - # Determine max tokens dynamically via LiteLLM info - max_tokens = 8000 - try: - info = litellm.get_model_info(model_name) - if info and "max_tokens" in info: - max_tokens = info["max_tokens"] - except: - pass - - super().__init__(model=model_name, max_tokens=max_tokens, **kwargs) - def _prepare_messages(self, prompt=None, messages=None): """Helper to prepare the messages list, including the system prompt.""" if messages is None: @@ -34,34 +23,10 @@ return messages - def forward(self, prompt=None, messages=None, **kwargs): + async def acompletion(self, prompt=None, messages=None, **kwargs): """ - Synchronous forward pass using LiteLLM. + Asynchronous completion pass using LiteLLM. """ - # Use the helper to prepare messages - prepared_messages = self._prepare_messages(prompt=prompt, messages=messages) - - request = { - "model": self.model_name, - "messages": prepared_messages, - "api_key": self.api_key, - **self.kwargs, - **kwargs, - } - try: - return litellm.completion(**request) - except Exception as e: - # Distinguish between network errors and missing credits - err_msg = str(e) - if "authentication" in err_msg.lower() or "401" in err_msg: - raise RuntimeError(f"Authentication failed for {self.model_name}. Check your API key.") - raise RuntimeError(f"LiteLLM Error ({self.model_name}): {err_msg}") - - async def aforward(self, prompt=None, messages=None, **kwargs): - """ - Asynchronous forward pass using LiteLLM. - """ - # Use the helper to prepare messages prepared_messages = self._prepare_messages(prompt=prompt, messages=messages) request = { diff --git a/ai-hub/app/core/services/rag.py b/ai-hub/app/core/services/rag.py index 9af3909..75d9fec 100644 --- a/ai-hub/app/core/services/rag.py +++ b/ai-hub/app/core/services/rag.py @@ -1,13 +1,11 @@ -import asyncio from typing import List, Tuple from sqlalchemy.orm import Session, joinedload -import dspy from app.db import models from app.core.retrievers.faiss_db_retriever import FaissDBRetriever from app.core.retrievers.base_retriever import Retriever from app.core.providers.factory import get_llm_provider -from app.core.pipelines.dspy_rag import DspyRagPipeline +from app.core.pipelines.rag_pipeline import RagPipeline class RAGService: """ @@ -88,14 +86,14 @@ else: print("Warning: FaissDBRetriever requested but not available. Proceeding without it.") - rag_pipeline = DspyRagPipeline() + rag_pipeline = RagPipeline() - with dspy.context(lm=llm_provider): - answer_text = await rag_pipeline.forward( - question=prompt, - history=session.messages, - context_chunks = context_chunks - ) + answer_text = await rag_pipeline.forward( + question=prompt, + history=session.messages, + context_chunks = context_chunks, + llm_provider = llm_provider + ) # Save assistant's response assistant_message = models.Message(session_id=session_id, sender="assistant", content=answer_text) diff --git a/ai-hub/app/core/services/utils/code_change.py b/ai-hub/app/core/services/utils/code_change.py index 32a7b2e..bf3acc0 100644 --- a/ai-hub/app/core/services/utils/code_change.py +++ b/ai-hub/app/core/services/utils/code_change.py @@ -1,7 +1,6 @@ import logging import json import re -import dspy import uuid import asyncio from sqlalchemy.orm import Session @@ -119,14 +118,14 @@ file_path_to_change = item.get("file_path") overall_plan = self.input_data - with dspy.context(lm=self.llm_provider): - content, reasoning = await self.code_changer.forward( - overall_plan=overall_plan, - instruction=instruction, - filepath=file_path_to_change, - original_files=original_files_list, - updated_files=updated_files_list - ) + content, reasoning = await self.code_changer.forward( + overall_plan=overall_plan, + instruction=instruction, + filepath=file_path_to_change, + original_files=original_files_list, + updated_files=updated_files_list, + llm_provider=self.llm_provider + ) return {"content": content, "reasoning": reasoning} async def _handle_thinking_log(self, websocket: WebSocket, reasoning :str): @@ -194,13 +193,13 @@ await websocket.send_text(payload) async def _review_changes(self, final_code_changes: List[Dict[str, Any]]) -> Tuple[str, str, str]: - with dspy.context(lm=self.llm_provider): - decision, reasoning, answer = await self.code_reviewer.forward( - original_question=self.original_question, - execution_plan= json.dumps(self.history_plans), - final_code_changes=final_code_changes, - original_files=[{"file_path": k, "content": v} for k, v in self.original_files.items()] - ) + decision, reasoning, answer = await self.code_reviewer.forward( + original_question=self.original_question, + execution_plan= json.dumps(self.history_plans), + final_code_changes=final_code_changes, + original_files=[{"file_path": k, "content": v} for k, v in self.original_files.items()], + llm_provider=self.llm_provider + ) return decision, reasoning, answer async def _inline_code_replacement(self, input_text: str) -> str: @@ -277,7 +276,6 @@ response["content"] = await self._inline_code_replacement(response.pop("content")) self.updated_files[filepath] = response reasoning = response.get("reasoning", "") - # dspy.inspect_history(n=1) if reasoning: await self._handle_thinking_log(websocket, reasoning) diff --git a/ai-hub/app/core/services/workspace.py b/ai-hub/app/core/services/workspace.py index 3ced008..4d53025 100644 --- a/ai-hub/app/core/services/workspace.py +++ b/ai-hub/app/core/services/workspace.py @@ -1,4 +1,3 @@ -import dspy import json import uuid import os @@ -14,7 +13,7 @@ from app.db.session import SessionLocal from app.core.providers.factory import get_llm_provider # from app.core.pipelines.file_selector import CodeRagFileSelector -from app.core.pipelines.dspy_rag import DspyRagPipeline +from app.core.pipelines.rag_pipeline import RagPipeline from app.core.pipelines.question_decider import CodeRagQuestionDecider from app.core.services.utils.code_change import CodeChangeHelper from app.core.pipelines.validator import Validator,TokenLimitExceededError @@ -520,54 +519,6 @@ await self._store_retrieved_files(request_id=uuid.UUID(request_id), files=files) await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) - # session = self.db.query(models.Session).options( - # joinedload(models.Session.messages) - # ).filter(models.Session.id == file_request.session_id).first() - - # provider_name = data.get("provider_name", "gemini") - # llm_provider = get_llm_provider(provider_name) - # cfs = CodeRagFileSelector() - # retrieved_data = await self._retrieve_by_request_id(self.db, request_id=request_id) - # with dspy.context(lm=llm_provider): - # raw_answer_text ,reasoning = await cfs( - # question=file_request.question, - # retrieved_data=retrieved_data, - # history=session.messages - # ) - # assistant_message = models.Message(session_id=file_request.session_id, sender="assistant", content=f'${reasoning}: ${raw_answer_text}') - # self.db.add(assistant_message) - # self.db.commit() - # dspy.inspect_history(n=1) # Inspect the last DSPy operation for debugging - - # try: - # # Use ast.literal_eval for a safe and reliable parse - # answer_text = ast.literal_eval(raw_answer_text) - # except (ValueError, SyntaxError) as e: - # # Handle cases where the LLM output is not a valid list string. - # print(f"Error parsing LLM output: {e}") - # answer_text = [] # Default to an empty list to prevent errors. - # await websocket.send_text(json.dumps({ - # "type": "thinking_log", - # "content": f"Warning: AI's file list could not be parsed. Error: {e}" - # })) - # return - - # if len(answer_text) == 0: - # await websocket.send_text(json.dumps({ - # "type": "thinking_log", - # "content": "AI did not select any files to retrieve content for." - # })) - # await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) - # return - - # await websocket.send_text(json.dumps({ - # "type": "thinking_log", - # "content": f"AI selected files: {answer_text}. Now requesting file content." - # })) - - # # After getting the AI's selected files, we send a command to the client to get their content. - # await self.send_command(websocket, "get_file_content", data={"filepaths": answer_text, "request_id": request_id}) - async def handle_files_content_response(self, websocket: WebSocket, data: Dict[str, Any]): """ Handles the content of a list of files sent by the client. @@ -617,22 +568,22 @@ ).filter(models.Session.id == session_id).first() # Use the LLM to make a decision - with dspy.context(lm=get_llm_provider(provider_name="gemini")): - crqd = CodeRagQuestionDecider() - original_question= context_data.get("question", "") - try: - raw_answer_text, reasoning, decision = await crqd( - question=original_question, - history=session.messages, - retrieved_data=context_data - ) - except ValueError as e: - await websocket.send_text(json.dumps({ - "type": "error", - "content": f"Failed to process AI decision request. Error: {e}" - })) - return - # dspy.inspect_history(n=1) + llm_provider = get_llm_provider(provider_name="gemini") + crqd = CodeRagQuestionDecider() + original_question = context_data.get("question", "") + try: + raw_answer_text, reasoning, decision, instructions = await crqd.forward( + question=original_question, + history=session.messages, + retrieved_data=context_data, + llm_provider=llm_provider + ) + except ValueError as e: + await websocket.send_text(json.dumps({ + "type": "error", + "content": f"Failed to process AI decision request. Error: {e}" + })) + return if decision == "answer": # Handle regular answer @@ -653,16 +604,19 @@ "content": f"AI decided files are needed: {raw_answer_text}." })) try: - json_match = re.search(r'\[.*\]', raw_answer_text, re.DOTALL) - if json_match: - json_string = json_match.group(0) - answer_text = ast.literal_eval(json_string) - if not isinstance(answer_text, list): - raise ValueError("Parsed result is not a list.") + # raw_answer_text might be a JSON string or a list from the LLM + if isinstance(raw_answer_text, list): + answer_text = raw_answer_text else: - answer_text = ast.literal_eval(raw_answer_text) - if not isinstance(answer_text, list): - raise ValueError("Parsed result is not a list.") + json_match = re.search(r'\[.*\]', raw_answer_text, re.DOTALL) + if json_match: + json_string = json_match.group(0) + answer_text = ast.literal_eval(json_string) + else: + answer_text = ast.literal_eval(raw_answer_text) + + if not isinstance(answer_text, list): + raise ValueError("Parsed result is not a list.") except (ValueError, SyntaxError) as e: print(f"Error parsing LLM output: {e}") answer_text = [] @@ -685,11 +639,19 @@ })) try: - # The input_data is a JSON string of code change instructions - cch = CodeChangeHelper(db=self.db, provider_name="gemini", original_question=original_question, input_data=raw_answer_text, reasoning = reasoning,request_id= uuid.UUID(request_id)) + # The input_data should be a JSON string of code change instructions + input_data = json.dumps(instructions) if instructions else "[]" + cch = CodeChangeHelper( + db=self.db, + provider_name="gemini", + original_question=original_question, + input_data=input_data, + reasoning=reasoning, + request_id=uuid.UUID(request_id) + ) # Use the CodeChangeHelper to process all code changes - final_changes = await cch.process(websocket=websocket) + await cch.process(websocket=websocket) except (json.JSONDecodeError, ValueError) as e: logger.error(f"Error processing code changes: {e}") @@ -782,10 +744,14 @@ model_name=model_name_override, api_key_override=api_key_override ) - chat = DspyRagPipeline() - with dspy.context(lm=llm_provider): - answer_text = await chat(question=prompt, history=session.messages, context_chunks=[]) - # Save assistant's response + chat = RagPipeline() + answer_text = await chat.forward( + question=prompt, + history=session.messages, + context_chunks=[], + llm_provider=llm_provider + ) + # Save assistant's response assistant_message = models.Message(session_id=session_id, sender="assistant", content=answer_text) self.db.add(assistant_message) self.db.commit() diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt index 37a2533..5f71a02 100644 --- a/ai-hub/requirements.txt +++ b/ai-hub/requirements.txt @@ -14,8 +14,9 @@ pytest-mock numpy faiss-cpu -dspy aioresponses python-multipart PyJWT -tenacity \ No newline at end of file +tenacity +litellm +tiktoken \ No newline at end of file diff --git a/ai-hub/tests/core/pipelines/test_dspy_rag.py b/ai-hub/tests/core/pipelines/test_dspy_rag.py deleted file mode 100644 index 4fc3028..0000000 --- a/ai-hub/tests/core/pipelines/test_dspy_rag.py +++ /dev/null @@ -1,119 +0,0 @@ -import pytest -from typing import List -import asyncio -from unittest.mock import MagicMock, AsyncMock -from sqlalchemy.orm import Session -import dspy # <-- Import dspy - -# Import the pipeline and its new signature -from app.core.pipelines.dspy_rag import DspyRagPipeline, AnswerWithHistory -from app.db import models # Import your SQLAlchemy models for mocking history -from app.core.retrievers.base_retriever import Retriever - - -# --- Mock Classes --- - -class MockRetriever(Retriever): - """A mock retriever that returns a predefined list of strings.""" - def __init__(self, name: str, mock_data: List[str]): - self.name = name - self.mock_data = mock_data - - def retrieve_context(self, question: str, db: Session) -> List[str]: - return self.mock_data - -# --- Fixtures --- - -@pytest.fixture -def mock_db(): - """A mock SQLAlchemy session.""" - return MagicMock() - -@pytest.fixture(autouse=True) -def mock_dspy_predict_instance(mocker): - """ - Mocks the dspy.Predict class itself to return a mock instance - with a controllable aforward method. - """ - # Create a mock instance of dspy.Predict - mock_predict_instance = MagicMock() - mock_predict_instance.aforward = AsyncMock(return_value=MagicMock(answer="Mocked LLM answer.")) - - # Patch the dspy.Predict class to return our mock instance - mocker.patch('dspy.Predict', return_value=mock_predict_instance) - - return mock_predict_instance - - -# --- Test Cases --- - -@pytest.mark.asyncio -async def test_forward_pass_with_defaults(mock_db, mock_dspy_predict_instance): - """Test a successful forward pass using default processors.""" - pipeline = DspyRagPipeline() - - question = "What is the capital of France?" - history = [models.Message(sender="user", content="Hello there."), models.Message(sender="assistant", content="Hi.")] - context_chunks = ["Context 1.", "Context 2."] - - response = await pipeline.forward(question=question, history=history, context_chunks=context_chunks) - - expected_context = "Context 1.\n\nContext 2." - expected_history = "Human: Hello there.\nAssistant: Hi." - - mock_dspy_predict_instance.aforward.assert_called_once_with( - context=expected_context, - chat_history=expected_history, - question=question - ) - assert response == "Mocked LLM answer." - -@pytest.mark.asyncio -async def test_forward_with_custom_processors(mock_db, mock_dspy_predict_instance): - """Test that custom processors are used correctly.""" - - def custom_context_processor(contexts: List[str]) -> str: - return "CUSTOM_CONTEXT: " + " | ".join(contexts) - - def custom_history_formatter(history: List[models.Message]) -> str: - return " " + " ".join([m.content for m in history]) + " " - - def custom_response_processor(response: str) -> str: - return f"FINAL: {response.upper()}" - - pipeline = DspyRagPipeline( - context_postprocessor=custom_context_processor, - history_formatter=custom_history_formatter, - response_postprocessor=custom_response_processor - ) - - question = "Custom question?" - history = [models.Message(sender="user", content="User message.")] - context_chunks = ["Context A", "Context B"] - - response = await pipeline.forward(question=question, history=history, context_chunks=context_chunks) - - mock_dspy_predict_instance.aforward.assert_called_once_with( - context="CUSTOM_CONTEXT: Context A | Context B", - chat_history=" User message. ", - question="Custom question?" - ) - assert response == "FINAL: MOCKED LLM ANSWER." - -@pytest.mark.asyncio -async def test_empty_context_and_history_handling(mock_db, mock_dspy_predict_instance): - """Test behavior with empty context and chat history.""" - pipeline = DspyRagPipeline() - - question = "No context question." - history = [] - context_chunks = [] - - response = await pipeline.forward(question=question, history=history, context_chunks=context_chunks) - - mock_dspy_predict_instance.aforward.assert_called_once_with( - context="No context provided.", - chat_history="", - question=question - ) - assert response == "Mocked LLM answer." \ No newline at end of file diff --git a/ai-hub/tests/core/pipelines/test_rag_pipeline.py b/ai-hub/tests/core/pipelines/test_rag_pipeline.py new file mode 100644 index 0000000..624aefb --- /dev/null +++ b/ai-hub/tests/core/pipelines/test_rag_pipeline.py @@ -0,0 +1,116 @@ +import pytest +from typing import List +import asyncio +from unittest.mock import MagicMock, AsyncMock +from sqlalchemy.orm import Session + +# Import the pipeline +from app.core.pipelines.rag_pipeline import RagPipeline +from app.db import models # Import your SQLAlchemy models for mocking history +from app.core.retrievers.base_retriever import Retriever + + +# --- Mock Classes --- + +class MockRetriever(Retriever): + """A mock retriever that returns a predefined list of strings.""" + def __init__(self, name: str, mock_data: List[str]): + self.name = name + self.mock_data = mock_data + + def retrieve_context(self, question: str, db: Session) -> List[str]: + return self.mock_data + +# --- Fixtures --- + +@pytest.fixture +def mock_db(): + """A mock SQLAlchemy session.""" + return MagicMock() + +@pytest.fixture +def mock_llm_provider(): + """ + Mocks the llm_provider that the pipeline now uses. + """ + mock_provider = MagicMock() + mock_completion = MagicMock() + mock_completion.choices = [MagicMock()] + mock_completion.choices[0].message.content = "Mocked LLM answer." + mock_provider.acompletion = AsyncMock(return_value=mock_completion) + return mock_provider + + +# --- Test Cases --- + +@pytest.mark.asyncio +async def test_forward_pass_with_defaults(mock_db, mock_llm_provider): + """Test a successful forward pass using default processors.""" + pipeline = RagPipeline() + + question = "What is the capital of France?" + history = [models.Message(sender="user", content="Hello there."), models.Message(sender="assistant", content="Hi.")] + context_chunks = ["Context 1.", "Context 2."] + + response = await pipeline.forward( + question=question, + history=history, + context_chunks=context_chunks, + llm_provider=mock_llm_provider + ) + + # Assert that the completion method was called + mock_llm_provider.acompletion.assert_called_once() + assert response == "Mocked LLM answer." + +@pytest.mark.asyncio +async def test_forward_with_custom_processors(mock_db, mock_llm_provider): + """Test that custom processors are used correctly.""" + + def custom_context_processor(contexts: List[str]) -> str: + return "CUSTOM_CONTEXT: " + " | ".join(contexts) + + def custom_history_formatter(history: List[models.Message]) -> str: + return " " + " ".join([m.content for m in history]) + " " + + def custom_response_processor(response: str) -> str: + return f"FINAL: {response.upper()}" + + pipeline = RagPipeline( + context_postprocessor=custom_context_processor, + history_formatter=custom_history_formatter, + response_postprocessor=custom_response_processor + ) + + question = "Custom question?" + history = [models.Message(sender="user", content="User message.")] + context_chunks = ["Context A", "Context B"] + + response = await pipeline.forward( + question=question, + history=history, + context_chunks=context_chunks, + llm_provider=mock_llm_provider + ) + + mock_llm_provider.acompletion.assert_called_once() + assert response == "FINAL: MOCKED LLM ANSWER." + +@pytest.mark.asyncio +async def test_empty_context_and_history_handling(mock_db, mock_llm_provider): + """Test behavior with empty context and chat history.""" + pipeline = RagPipeline() + + question = "No context question." + history = [] + context_chunks = [] + + response = await pipeline.forward( + question=question, + history=history, + context_chunks=context_chunks, + llm_provider=mock_llm_provider + ) + + mock_llm_provider.acompletion.assert_called_once() + assert response == "Mocked LLM answer." \ No newline at end of file diff --git a/ai-hub/tests/core/providers/test_factory.py b/ai-hub/tests/core/providers/test_factory.py index 6caf8d8..ea849ca 100644 --- a/ai-hub/tests/core/providers/test_factory.py +++ b/ai-hub/tests/core/providers/test_factory.py @@ -17,8 +17,8 @@ assert isinstance(provider, GeneralProvider) def test_get_llm_provider_raises_error_for_unsupported_provider(): - """Tests that the factory raises an error for an unsupported provider name.""" - with pytest.raises(ValueError, match="Unsupported model provider: 'unknown'"): + """Tests that the factory raises an error for an unknown provider with no model name.""" + with pytest.raises(ValueError, match="No model name provided for 'unknown'"): get_llm_provider("unknown") @@ -37,16 +37,17 @@ assert provider.api_key == "dummy_key" assert provider.voice_name == valid_voice -def test_get_tts_provider_raises_error_for_unsupported_provider(): - """Tests that the factory raises an error for an unsupported TTS provider name.""" +def test_get_tts_provider_falls_back_to_general(): + """Tests that the factory falls back to GeneralTTSProvider for unknown names.""" + from app.core.providers.tts.general import GeneralTTSProvider valid_voice = GeminiTTSProvider.AVAILABLE_VOICES[0] - with pytest.raises(ValueError, match="Unsupported TTS provider: 'unknown'"): - get_tts_provider( - "unknown", - api_key="dummy_key", - model_name="dummy-model", - voice_name=valid_voice - ) + provider = get_tts_provider( + "unknown", + api_key="dummy_key", + model_name="dummy-model", + voice_name=valid_voice + ) + assert isinstance(provider, GeneralTTSProvider) # --- NEW Tests for STT Provider --- @@ -62,11 +63,12 @@ assert provider.api_key == "dummy_key" assert provider.model_name == "dummy-model" -def test_get_stt_provider_raises_error_for_unsupported_provider(): - """Tests that the factory raises an error for an unsupported STT provider name.""" - with pytest.raises(ValueError, match="Unsupported STT provider: 'unknown'"): - get_stt_provider( - "unknown", - api_key="dummy_key", - model_name="dummy-model" - ) +def test_get_stt_provider_falls_back_to_general(): + """Tests that the factory falls back to GeneralSTTProvider for unknown names.""" + from app.core.providers.stt.general import GeneralSTTProvider + provider = get_stt_provider( + "unknown", + api_key="dummy_key", + model_name="dummy-model" + ) + assert isinstance(provider, GeneralSTTProvider) diff --git a/ai-hub/tests/core/services/test_rag.py b/ai-hub/tests/core/services/test_rag.py index 3ed53a3..2f5ec3a 100644 --- a/ai-hub/tests/core/services/test_rag.py +++ b/ai-hub/tests/core/services/test_rag.py @@ -5,7 +5,6 @@ from sqlalchemy.exc import SQLAlchemyError from typing import List from datetime import datetime -import dspy from app.core.services.rag import RAGService from app.db import models @@ -13,7 +12,7 @@ from app.core.vector_store.embedder.mock import MockEmbedder from app.core.retrievers.faiss_db_retriever import FaissDBRetriever, Retriever from app.core.retrievers.base_retriever import Retriever -from app.core.pipelines.dspy_rag import DspyRagPipeline +from app.core.pipelines.rag_pipeline import RagPipeline from app.core.providers.base import LLMProvider @pytest.fixture @@ -33,24 +32,9 @@ retrievers=[mock_web_retriever, mock_faiss_retriever] ) -# --- Session Management Tests --- - -# def test_create_session(rag_service: RAGService): -# """Tests that the create_session method correctly creates a new session.""" -# mock_db = MagicMock(spec=Session) - -# # rag_service.create_session(db=mock_db, user_id="test_user", provider_name="gemini") - -# mock_db.add.assert_called_once() -# added_object = mock_db.add.call_args[0][0] -# assert isinstance(added_object, models.Session) -# assert added_object.user_id == "test_user" -# assert added_object.provider_name == "gemini" - @patch('app.core.services.rag.get_llm_provider') -@patch('app.core.services.rag.DspyRagPipeline') -@patch('dspy.configure') -def test_chat_with_rag_success(mock_configure, mock_dspy_pipeline, mock_get_llm_provider, rag_service: RAGService): +@patch('app.core.services.rag.RagPipeline') +def test_chat_with_rag_success(mock_rag_pipeline, mock_get_llm_provider, rag_service: RAGService): """ Tests the full orchestration of a chat message within a session using the default model and with the retriever loading parameter explicitly set to False. @@ -62,12 +46,12 @@ mock_llm_provider = MagicMock(spec=LLMProvider) mock_get_llm_provider.return_value = mock_llm_provider - mock_pipeline_instance = MagicMock(spec=DspyRagPipeline) + mock_pipeline_instance = MagicMock(spec=RagPipeline) mock_pipeline_instance.forward = AsyncMock(return_value="Final RAG response") - mock_dspy_pipeline.return_value = mock_pipeline_instance + mock_rag_pipeline.return_value = mock_pipeline_instance # --- Act --- - answer, provider_name = asyncio.run( + answer, provider_name, _id = asyncio.run( rag_service.chat_with_rag( db=mock_db, session_id=42, @@ -80,16 +64,17 @@ # --- Assert --- mock_db.query.assert_called_once_with(models.Session) assert mock_db.add.call_count == 2 - mock_get_llm_provider.assert_called_once_with("deepseek") + mock_get_llm_provider.assert_called_once_with("deepseek", model_name="", api_key_override=None) - # Assert that DspyRagPipeline was called without any arguments - mock_dspy_pipeline.assert_called_once_with() + # Assert that RagPipeline was called without any arguments + mock_rag_pipeline.assert_called_once_with() # Assert that the forward method received the correct arguments mock_pipeline_instance.forward.assert_called_once_with( question="Test prompt", history=mock_session.messages, - context_chunks=[] # It was called with an empty list + context_chunks=[], + llm_provider=mock_llm_provider ) assert answer == "Final RAG response" @@ -106,17 +91,16 @@ mock_db.query.return_value.options.return_value.filter.return_value.first.return_value = mock_session with patch('app.core.services.rag.get_llm_provider') as mock_get_llm_provider, \ - patch('app.core.services.rag.DspyRagPipeline') as mock_dspy_pipeline, \ - patch('dspy.configure'): + patch('app.core.services.rag.RagPipeline') as mock_rag_pipeline: mock_llm_provider = MagicMock(spec=LLMProvider) mock_get_llm_provider.return_value = mock_llm_provider - mock_pipeline_instance = MagicMock(spec=DspyRagPipeline) + mock_pipeline_instance = MagicMock(spec=RagPipeline) mock_pipeline_instance.forward = AsyncMock(return_value="Final RAG response from Gemini") - mock_dspy_pipeline.return_value = mock_pipeline_instance + mock_rag_pipeline.return_value = mock_pipeline_instance # --- Act --- - answer, provider_name = asyncio.run( + answer, provider_name, _id = asyncio.run( rag_service.chat_with_rag( db=mock_db, session_id=43, @@ -129,16 +113,17 @@ # --- Assert --- mock_db.query.assert_called_once_with(models.Session) assert mock_db.add.call_count == 2 - mock_get_llm_provider.assert_called_once_with("gemini") + mock_get_llm_provider.assert_called_once_with("gemini", model_name="", api_key_override=None) - # Assert that DspyRagPipeline was called without any arguments - mock_dspy_pipeline.assert_called_once_with() + # Assert that RagPipeline was called without any arguments + mock_rag_pipeline.assert_called_once_with() # Assert that the forward method received the correct arguments mock_pipeline_instance.forward.assert_called_once_with( question="Test prompt for Gemini", history=mock_session.messages, - context_chunks=[] # It was called with an empty list + context_chunks=[], + llm_provider=mock_llm_provider ) assert answer == "Final RAG response from Gemini" @@ -147,7 +132,7 @@ def test_chat_with_rag_with_faiss_retriever(rag_service: RAGService): """ - Tests that the chat_with_rag method correctly initializes the DspyRagPipeline + Tests that the chat_with_rag method correctly initializes the RagPipeline with the FaissDBRetriever when `load_faiss_retriever` is True. """ # --- Arrange --- @@ -159,17 +144,16 @@ rag_service.faiss_retriever.retrieve_context.return_value = ["faiss_chunk_1", "faiss_chunk_2"] with patch('app.core.services.rag.get_llm_provider') as mock_get_llm_provider, \ - patch('app.core.services.rag.DspyRagPipeline') as mock_dspy_pipeline, \ - patch('dspy.configure'): + patch('app.core.services.rag.RagPipeline') as mock_rag_pipeline: mock_llm_provider = MagicMock(spec=LLMProvider) mock_get_llm_provider.return_value = mock_llm_provider - mock_pipeline_instance = MagicMock(spec=DspyRagPipeline) + mock_pipeline_instance = MagicMock(spec=RagPipeline) mock_pipeline_instance.forward = AsyncMock(return_value="Response with FAISS context") - mock_dspy_pipeline.return_value = mock_pipeline_instance + mock_rag_pipeline.return_value = mock_pipeline_instance # --- Act --- - answer, provider_name = asyncio.run( + answer, provider_name, _id = asyncio.run( rag_service.chat_with_rag( db=mock_db, session_id=44, @@ -180,8 +164,8 @@ ) # --- Assert --- - # The DspyRagPipeline is still called without arguments - mock_dspy_pipeline.assert_called_once_with() + # The RagPipeline is still called without arguments + mock_rag_pipeline.assert_called_once_with() # The retriever's context method is now called rag_service.faiss_retriever.retrieve_context.assert_called_once_with(query="Test prompt with FAISS", db=mock_db) @@ -190,7 +174,8 @@ mock_pipeline_instance.forward.assert_called_once_with( question="Test prompt with FAISS", history=mock_session.messages, - context_chunks=["faiss_chunk_1", "faiss_chunk_2"] + context_chunks=["faiss_chunk_1", "faiss_chunk_2"], + llm_provider=mock_llm_provider ) assert answer == "Response with FAISS context" diff --git a/ai-hub/tests/core/vector_store/test_embedder_factory.py b/ai-hub/tests/core/vector_store/test_embedder_factory.py index e7c3875..332a837 100644 --- a/ai-hub/tests/core/vector_store/test_embedder_factory.py +++ b/ai-hub/tests/core/vector_store/test_embedder_factory.py @@ -1,15 +1,14 @@ import pytest from app.core.vector_store.embedder.factory import get_embedder_from_config -from app.config import EmbeddingProvider def test_get_mock_embedder(): - embedder = get_embedder_from_config(EmbeddingProvider.MOCK, 768, None, None) + embedder = get_embedder_from_config("mock", 768, None, None) assert embedder is not None assert embedder.dimension == 768 def test_get_genai_embedder(): embedder = get_embedder_from_config( - EmbeddingProvider.GOOGLE_GEMINI, 768, "gemini-embedding-001", "fake_key" + "google_gemini", 768, "gemini-embedding-001", "fake_key" ) assert embedder.model_name == "gemini-embedding-001" assert embedder.api_key == "fake_key" diff --git a/ai-hub/tests/test_app.py b/ai-hub/tests/test_app.py index 6dd2b4a..801860f 100644 --- a/ai-hub/tests/test_app.py +++ b/ai-hub/tests/test_app.py @@ -75,6 +75,7 @@ user_id="test_user", provider_name="gemini", title="New Chat Session", + feature_name="default", created_at=datetime.now() ) mock_services.session_service.create_session.return_value = mock_session_obj @@ -92,7 +93,7 @@ assert response_data["id"] == 1 assert response_data["user_id"] == "test_user" mock_services.session_service.create_session.assert_called_once_with( - db=mock_db, user_id="test_user", provider_name="gemini" + db=mock_db, user_id="test_user", provider_name="gemini", feature_name="default", stt_provider_name=None, tts_provider_name=None ) @patch('app.app.ServiceContainer') @@ -113,7 +114,7 @@ mock_service_container.return_value = mock_services # Correctly mock the async method using AsyncMock - mock_chat_with_rag = AsyncMock(return_value=("This is a mock response.", "deepseek")) + mock_chat_with_rag = AsyncMock(return_value=("This is a mock response.", "gemini", 42)) mock_services.rag_service.chat_with_rag = mock_chat_with_rag app = create_app() @@ -126,9 +127,9 @@ # Assert assert response.status_code == 200 assert response.json()["answer"] == "This is a mock response." - assert response.json()["provider_used"] == "deepseek" + assert response.json()["provider_used"] == "gemini" mock_services.rag_service.chat_with_rag.assert_called_once_with( - db=mock_db, session_id=123, prompt="Hello there", provider_name="deepseek", load_faiss_retriever=False + db=mock_db, session_id=123, prompt="Hello there", provider_name="gemini", load_faiss_retriever=False, user_service=mock_services.user_service ) @patch('app.app.ServiceContainer') @@ -148,7 +149,7 @@ mock_service_container.return_value = mock_services # Correctly mock the async method using AsyncMock - mock_chat_with_rag = AsyncMock(return_value=("Mocked response from Gemini", "gemini")) + mock_chat_with_rag = AsyncMock(return_value=("Mocked response from Gemini", "gemini", 123)) mock_services.rag_service.chat_with_rag = mock_chat_with_rag app = create_app() @@ -159,13 +160,14 @@ # Assert assert response.status_code == 200 - assert response.json() == {"answer": "Mocked response from Gemini", "provider_used": "gemini"} + assert response.json() == {"answer": "Mocked response from Gemini", "provider_used": "gemini", "message_id": 123} mock_services.rag_service.chat_with_rag.assert_called_once_with( db=mock_db, session_id=42, prompt="Hello there, Gemini!", provider_name="gemini", - load_faiss_retriever=False + load_faiss_retriever=False, + user_service=mock_services.user_service ) @patch('app.app.ServiceContainer') diff --git a/ai-hub/tests/test_config.py b/ai-hub/tests/test_config.py index b7c9647..2cc2a2a 100644 --- a/ai-hub/tests/test_config.py +++ b/ai-hub/tests/test_config.py @@ -1,7 +1,7 @@ import pytest import importlib import yaml -from app.config import EmbeddingProvider, TTSProvider, STTProvider, Settings +from app.config import Settings @pytest.fixture @@ -161,7 +161,7 @@ monkeypatch.setenv("GEMINI_API_KEY", "mock_key") settings = Settings() - assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GEMINI + assert settings.TTS_PROVIDER == "google_gemini" assert settings.TTS_VOICE_NAME == "Laomedeia" assert settings.TTS_MODEL_NAME == "tts-model-from-yaml" assert settings.TTS_API_KEY == "tts-api-from-yaml" @@ -178,7 +178,7 @@ monkeypatch.setenv("GEMINI_API_KEY", "mock_key") settings = Settings() - assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GEMINI + assert settings.TTS_PROVIDER == "google_gemini" assert settings.TTS_VOICE_NAME == "Zephyr" assert settings.TTS_MODEL_NAME == "env-tts-model" assert settings.TTS_API_KEY == "env_tts_key" @@ -192,7 +192,7 @@ settings = Settings() - assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GEMINI + assert settings.TTS_PROVIDER == "google_gemini" assert settings.TTS_VOICE_NAME == "Kore" assert settings.TTS_MODEL_NAME == "gemini-2.5-flash-preview-tts" assert settings.TTS_API_KEY == "fallback_gemini_key" @@ -206,7 +206,7 @@ monkeypatch.setenv("GEMINI_API_KEY", "mock_gemini_key") settings = Settings() - assert settings.STT_PROVIDER == STTProvider.OPENAI + assert settings.STT_PROVIDER == "openai" assert settings.STT_MODEL_NAME == "stt-model-from-yaml" assert settings.STT_API_KEY == "stt-api-from-yaml" @@ -221,7 +221,7 @@ monkeypatch.setenv("GEMINI_API_KEY", "mock_gemini_key") settings = Settings() - assert settings.STT_PROVIDER == STTProvider.OPENAI + assert settings.STT_PROVIDER == "openai" assert settings.STT_MODEL_NAME == "env-stt-model" assert settings.STT_API_KEY == "env_stt_key" @@ -234,7 +234,7 @@ settings = Settings() - assert settings.STT_PROVIDER == STTProvider.OPENAI + assert settings.STT_PROVIDER == "openai" assert settings.STT_API_KEY == "fallback_openai_key" @@ -246,5 +246,5 @@ settings = Settings() - assert settings.STT_PROVIDER == STTProvider.GOOGLE_GEMINI + assert settings.STT_PROVIDER == "google_gemini" assert settings.STT_API_KEY == "fallback_gemini_key" \ No newline at end of file diff --git a/deploy_remote.sh b/deploy_remote.sh index e7ff0a7..2e3411d 100755 --- a/deploy_remote.sh +++ b/deploy_remote.sh @@ -19,19 +19,24 @@ if [ -n "$GITBUCKET_TOKEN" ] && [ -n "$SNIPPET_ID" ]; then echo "Secrets not provided in environment. Attempting to fetch from GitBucket..." - TMP_SECRETS=$(mktemp -d) - if git clone "https://yangyangxie:${GITBUCKET_TOKEN}@gitbucket.jerxie.com/git/gist/yangyangxie/${SNIPPET_ID}.git" "$TMP_SECRETS" &> /dev/null; then - if [ -f "$TMP_SECRETS/.env.production" ]; then - source "$TMP_SECRETS/.env.production" - HOST="${REMOTE_HOST:-$HOST}" - USER="${REMOTE_USER:-$USER}" - PASS="${REMOTE_PASSWORD:-$PASS}" - echo "Successfully loaded credentials from GitBucket." + TMP_SECRETS=$(mktemp -d) + if git clone "https://yangyangxie:${GITBUCKET_TOKEN}@gitbucket.jerxie.com/git/gist/yangyangxie/${SNIPPET_ID}.git" "$TMP_SECRETS" &> /dev/null; then + if [ -f "$TMP_SECRETS/.env.production" ]; then + source "$TMP_SECRETS/.env.production" + HOST="${REMOTE_HOST:-$HOST}" + USER="${REMOTE_USER:-$USER}" + PASS="${REMOTE_PASSWORD:-$PASS}" + echo "Successfully loaded credentials from GitBucket." + # Strip potential carriage returns + HOST=$(echo "$HOST" | tr -d '\r') + USER=$(echo "$USER" | tr -d '\r') + PASS=$(echo "$PASS" | tr -d '\r') + fi + else + echo "Failed to fetch secrets from GitBucket." fi - else - echo "Failed to fetch secrets from GitBucket." + rm -rf "$TMP_SECRETS" fi - rm -rf "$TMP_SECRETS" fi # Fallback defaults if still not set @@ -55,7 +60,7 @@ fi # 1. Sync local codebase to temporary directory on remote server -echo "Syncing local files to production..." +echo "Syncing local files to production [USER: $USER, HOST: $HOST]..." sshpass -p "$PASS" rsync -avz \ --exclude '.git' \ --exclude 'node_modules' \