diff --git a/.gitignore b/.gitignore index a2ad79e..af87667 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ **.db ai-hub/data/* ai-hub/ai_payloads/* -ai-hub/.env.prod \ No newline at end of file +ai-hub/.env.prod +@eaDir/ +**/.DS_Store \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 011873d..41194e6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,15 +5,23 @@ "version": "0.2.0", "configurations": [ { - "name": "Python Debugger: Current File", + "name": "Python: Remote Attach", "type": "debugpy", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal" + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/ai-hub", + "remoteRoot": "/ai-hub" + } + ] }, { "name": "Python: Uvicorn", - "type": "python", + "type":"debugpy", "request": "launch", "module": "uvicorn", "args": [ diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index 597532d..f48ae13 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -57,6 +57,8 @@ ) logging.basicConfig(level=settings.LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s') + logging.getLogger("dspy").setLevel(logging.DEBUG) + # --- Initialize Core Services using settings --- # 1. Use the new, more flexible factory function to create the embedder instance diff --git a/ai-hub/app/core/pipelines/code_changer.py b/ai-hub/app/core/pipelines/code_changer.py new file mode 100644 index 0000000..6c48779 --- /dev/null +++ b/ai-hub/app/core/pipelines/code_changer.py @@ -0,0 +1,141 @@ +import dspy +import json +import os +from typing import List, Dict, Any, Tuple, Optional, Callable + + +class CodeChanger(dspy.Signature): + """ + ### 🧠 Core Directives + + You are a code generation assistant specialized in producing **one precise and complete code change** per instruction. Your output must be a strict JSON object containing: + + - `reasoning`: A concise explanation of the change. + - `content`: The **full content of the file** (or an empty string for deletions). + + No extra output is allowed—**only the JSON object.** + + --- + + ### 1. Input Structure + + You will be provided: + + - `instruction`: A plain-text directive specifying the desired code change. + - `original_files`: A list of unmodified files from the codebase: + ```json + [ + { + "file_path": "/app/main.py", + "content": "# main.py\\n\\ndef new_function():\\n pass\\n\\nnew_function()\\n" + }, + ... + ] + ```` + + - `updated_files`: Files already modified in prior steps: + + + + ```json + [ + { + "file_path": "/app/main.py", + "reasoning": "...", + "content": "..." + }, + ... + ] + ``` + + ----- + + ### 2\. Code Generation Rules + + Your task is to output **one full-file code change** for the specified `file_path`, using the instruction and file context. + + + ### **Code Quality Requirements** + + * **No placeholders or pseudocode.** All code must be complete, functional, and ready to execute. + * **Provide the full code.** When making changes, always output the entire, modified file. Do not use abbreviations, placeholders, or `TODO` comments. + * **Ensure code integrity.** The updated code, when combined with the existing project files, must form a seamless, complete, and fully executable codebase. It should be both readable and extensible. + * **Include all dependencies and imports.** The code must be self-contained and immediately executable without requiring the user to add missing imports. + * **All definitions must be resolvable.** Ensure that all functions, variables, and return values are defined within the scope of the provided file. + * **Modular and well-structured.** The code should be modular, logically organized, and include clear, concise comments where necessary. + * **Follow best practices.** Adhere to good naming conventions and ensure a logical flow throughout the code. + + #### 🔹 Change Types + + * **File Modification**: + - Provide the entire updated file in `content`. + * **File Creation**: + - Include full file content in `content`. + * **File Deletion**: + - Set `content` to `""` and explain the deletion in `reasoning`. + + ----- + + ### 3\. Output Format + + Return exactly one JSON object: + + ```json + { + "reasoning": "Brief explanation of the change.", + "content": "Full file content here" + } + ``` + + **Do not output any explanation, headers, or text outside this JSON.** + """ + + overall_plan = dspy.InputField(desc="The high-level strategy for the code changes.") + instruction = dspy.InputField(desc="The specific instruction for this step of the code change.") + filepath = dspy.InputField(desc="The path of the file to be changed, created, or deleted.") + + original_files = dspy.InputField( + desc="A JSON list of dictionaries with 'file_path' and 'content' for the original files." + ) + + updated_files = dspy.InputField( + desc="A JSON list of dictionaries with 'file_path' and 'content' for files modified by previous steps." + ) + + reasoning = dspy.OutputField(desc="A detailed reasoning process for the code change.") + content = dspy.OutputField(desc="The generated code.") + + +class CodeRagCodeChanger(dspy.Module): + """ + A single-step module to generate code changes based on user instructions and relevant files. + """ + + def __init__(self): + super().__init__() + self.code_changer = dspy.ChainOfThought(CodeChanger) + + async def forward( + self, + overall_plan: str, + instruction: str, + filepath: str, + original_files: List[Dict[str, Any]], + updated_files: List[Dict[str, Any]] + ) -> Tuple[str, str]: + + # Convert dictionaries to JSON strings for the model + original_json = json.dumps(original_files) + updated_json = json.dumps(updated_files) + + # Generate prediction + prediction = await self.code_changer.acall( + overall_plan=overall_plan, + instruction=instruction, + filepath=filepath, + original_files=original_json, + updated_files=updated_json + ) + + # Return code diff and reasoning + return prediction.content, prediction.reasoning \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/file_selector.py b/ai-hub/app/core/pipelines/file_selector.py index 26d8e21..47ce4ae 100644 --- a/ai-hub/app/core/pipelines/file_selector.py +++ b/ai-hub/app/core/pipelines/file_selector.py @@ -1,7 +1,7 @@ import dspy import json from app.db import models -from typing import List, Dict, Any +from typing import List, Dict, Any,Tuple class SelectFiles(dspy.Signature): """ @@ -43,7 +43,7 @@ for msg in history ) - async def forward(self, question: str, retrieved_data: List[str], history: List[models.Message]) -> List[str]: + async def forward(self, question: str, retrieved_data: List[str], history: List[models.Message]) -> Tuple[List[str], str]: # Convert the list of strings to a JSON string using json.dumps # The prompt is now explicitly asking for a JSON array of strings, so you can pass the raw JSON string. retrieved_json = json.dumps(retrieved_data) @@ -55,4 +55,4 @@ ) # The prediction.answer should be the list of strings directly as per the output format. - return prediction.answer \ No newline at end of file + return prediction.answer, prediction.reasoning \ No newline at end of file diff --git a/ai-hub/app/core/pipelines/question_decider.py b/ai-hub/app/core/pipelines/question_decider.py index 1f9e715..7ec258e 100644 --- a/ai-hub/app/core/pipelines/question_decider.py +++ b/ai-hub/app/core/pipelines/question_decider.py @@ -2,81 +2,209 @@ import json import os from app.db import models -from typing import List, Dict, Any, Tuple, Optional -from typing import List, Callable, Optional +from typing import List, Dict, Any, Tuple, Optional, Callable class QuestionDecider(dspy.Signature): """ - You are a highly specialized AI assistant for software engineering tasks. Your role is to analyze a user's request and the provided codebase to decide on the best course of action: provide an answer, suggest a code change, or ask for more files. Your decisions must be based **strictly and exclusively** on the provided content. +### 🧠 **Core Directives** - --- +You are a specialized AI assistant for software engineering tasks. Your responses—providing an answer, suggesting a code change, or requesting more files—must be based **exclusively** on the provided codebase content. Your primary goal is to be helpful and accurate while adhering strictly to the following directives. - ### 🧠 Core Directives: +----- - 1. **Analyze the Request and Available Data:** - * Examine the `question` and `chat_history` to understand the user's intent. - * You are provided with two distinct lists of files: `retrieved_paths_with_content` (files you have and can use) and `retrieved_paths_without_content` (files you know exist but need to request their content). - * **Crucial Rule:** The `retrieved_paths_with_content` is your complete and only source of usable code information. Do not mention or refer to any code that is not explicitly present in this data. +## 1\. Data Analysis and Availability - 2. **Determine File Requirements:** - * Identify any specific file paths mentioned by the user or required to fulfill the request. - * **Do not re-request files that you already have, "have" means your requested file path is already existed in `retrieved_paths_with_content`** - * A file is considered "missing" only if its path is not in `retrieved_paths_with_content` and is either mentioned in the request or is required for a code-change. This is the only valid reason to choose `decision='files'`. The `retrieved_paths_without_content` list helps you identify what files are candidates to request. - * **Crucial New Rule:** If a file path mentioned by the user is **not found** in either `retrieved_paths_with_content` or `retrieved_paths_without_content`, you must choose the 'answer' decision and explain that the file could not be found. Do not request it. +This section outlines the process for analyzing user requests and accessing file content to provide a complete and accurate response. Your ability to answer a user's question depends entirely on the data you can access. - 3. **Choose the Correct Decision Path:** - * **Decision: 'answer'** - * Choose this if you have all the necessary information in `retrieved_paths_with_content` to provide a full, complete, and comprehensive explanation for a non-code-modification question. - * Also choose this if the user asks about a file that is not present in any of the provided data. You must explain to the user why the file could not be found. - * The `answer` field must contain a detailed, well-structured explanation in Markdown. - * The `code_diff` field must be empty. +* **Analyze the User's Request:** Carefully examine the **`question`** and **`chat_history`** to understand what the user wants. This is the most crucial step, as it guides which files you need to retrieve. - * **Decision: 'code_change'** - * Choose this if the user's request involves modifying or adding to the code (e.g., "fix this bug," "implement this feature," "refactor this function", "show me full code"). - * This decision is also for requests to **generate new code** (e.g., creating a new file from scratch). If the user asks for the "full code" of a file that doesn't exist, this is a code generation task. - * You must have all the relevant files with content in `retrieved_paths_with_content` to propose the change. - * The `answer` field can be an optional, high-level summary of the change. - * The `code_diff` field must contain the full and complete git diff showing the exact modifications, including adding new files. +* **Source of Information:** The only information you can use to generate a code-related answer comes from the files provided in the **`retrieved_paths_with_content`** list. You cannot use any information from the **`retrieved_paths_without_content`** list or from any other source. - * **Decision: 'files'** - * Choose this **only if** you need more files to fulfill the user's request. - * The `answer` field must be a valid JSON list of strings, with each string being an explicit, complete file path that is **found in the `retrieved_paths_without_content` list.** **Do not use wildcard characters like `*` or `?`.** - * The `code_diff` field must be empty. +* **File Data & Availability ** - 4. **Final Output Requirements:** - * Your output must be a valid JSON object matching the schema. - * **Crucial New Rule:** Do not mention internal system variables or the DSPy signature fields (`retrieved_paths_with_content`, `retrieved_paths_without_content`) in the `reasoning`, `answer`, or `code_diff` fields. The output should be user-friendly and not leak implementation details. - * Be helpful, precise, and adhere strictly to these rules. Do not hallucinate file paths or content. - --- +The information you receive is categorized into two mutually exclusive lists. This structure ensures you know exactly which files you can access and which are available to be requested. + +*** + +* **`retrieved_paths_with_content`**: A list of objects, each with a `file_path` and `content` attribute. These are files you have **already requested** and can now use to generate a response or a code change. You cannot request these files again. + + * **Example:** + ```json + [ + { + "file_path": "src/utils/helpers.js", + "content": "// Helper function to format a date...\nexport const formatDate = (date) => { /*...*/ };" + } + ] + ``` + +* **`retrieved_paths_without_content`**: A list of objects, each with a single `file_path` attribute. These files exist in the codebase but have **not yet been requested**. You cannot access their content. The paths in this list are your only valid options for a `files` decision. + + * **Example:** + ```json + [ + {"file_path": "src/components/Button.jsx"}, + {"file_path": "src/styles/theme.css"}, + {"file_path": "tests/unit/helpers.test.js"} + ] + ``` + +----- + +## 2\. Decision Logic + +You must choose one of three mutually exclusive decisions: `answer`, `code_change`, or `files`. + +### `decision='answer'` + + * **When to use:** Choose this if you have all the necessary information in `retrieved_paths_with_content` to provide a complete and comprehensive explanation for a non-code-modification question. + * **Example use cases:** + * "What does this function do?" + * "Explain the architecture of the `app` folder." + * "Why is this test failing?" + * "Which files are involved in the user authentication flow?" + * **Content:** The `answer` field must contain a detailed, well-structured explanation in Markdown. + * **Special Case:** If a user requests a file that is **not** present in either `retrieved_paths_with_content` or `retrieved_paths_without_content`, you **must** choose `answer` and explain that the file could not be found. This rule applies when the user's intent is to modify an existing file. If they are asking you to create a new file, that falls under `code_change`. + +----- + +### `decision='code_change'` + #### **When to Use** + + Choose this decision when the user's request involves any manipulation of code. This includes: + + * **Modifying existing code**: Fixing a bug, refactoring a function, implementing a new feature within an existing file. + * **Creating new code**: Generating a new file from scratch (e.g., "create a new file for a utility function"). + * **Removing code**: Deleting a file or a block of code. + * **Displaying code**: Responding to requests like "show me the full code for..." or "update this file." + + \***Pre-conditions**: You must have all the relevant files with content in `retrieved_paths_with_content` to propose the change. + + ----- + + ### **High-Level Plan** + + The `answer` field must contain a **high-level strategy plan** for the proposed code changes. This plan should be broken down into a series of **specific, actionable instructions**. Each instruction must represent an independent, testable step. This ensures that the changes are modular and easy to follow. + + **Example Plan Breakdown:** + + * **User Request:** "Refactor the `create_app` function to improve readability and maintainability by breaking it into smaller helper functions." + * **Plan Breakdown:** + 1. **Extract initialization logic:** Create a new function `initialize_database` to handle all database setup. + 2. **Modularize middleware:** Implement a `setup_middlewares` function to handle all middleware configurations. + 3. **Group route registration:** Create a new function `register_routes` to modularize all route registrations. + 4. **Isolate error handling:** Implement a dedicated function `setup_error_handling` to set up error handling logic. + 5. **Update the main function:** Modify the `create_app` function to call these new helper functions in the correct sequence. + + ----- + + ### **Code Change Instructions Format** + + The response must be a **JSON list of objects**. No other text, fields, or conversational elements are allowed. + + ```json + [ + { + "file_path": "/app/main.py", + "action": "modify", + "change_instruction": "Refactor the create_app function to improve readability and maintainability by breaking it into smaller helper functions.", + "original_files": ["/app/core/services/tts.py", "/app/core/services/stt.py", "/app/main.py"], + "updated_files": ["/app/main.py"] + } + ... + ] + ```` + + ----- + + #### **Parameter Breakdown** + * **`file_path`** (string): The path for the file to be changed, created, or deleted. Must begin with a `/`. + * **New files**: Use a valid, non-existent path. + * **Deletions**: Use the path of the file to be removed. + * **`action`** (string): The operation on the file. Must be one of: `"create"`, `"delete"`, `"move"`, or `"modify"`. + * `"create"`: Creates a new file from scratch. + * `"delete"`: Deletes the entire file. + * `"move"`: This action renames or moves a file to a new path. It does not perform any code changes. The change_instruction for this action must explicitly state the new file path, which should be wrapped in backticks (``). + Example: "change_instruction": "Move the file to `/new/path/file.py`." + * `"modify"`: Makes partial code changes to an existing file, including inserting, deleting, or replacing lines of code. + * **`change_instruction`** (string): A clear and specific instruction for the code changer. + * **New files**: Briefly describe the file's purpose. + * **Deletions**: State the intent to delete the file. + * **`original_files`** (list of strings): Paths to pre-existing files needed for read-only context. This allows the AI to understand the change instruction based on the original files. This list should reference files from `retrieved_paths_with_content`. Use `[]` if no context is needed. Paths must begin with a `/`. + * **`updated_files`** (list of strings): Paths to files previously modified in the current session. This allows the AI to understand the changes made so far and handle incremental updates. Use this for referencing changes from earlier steps. Use `[]` if no previous changes are relevant. Paths must begin with a `/`. + ----- + + **Execution Note**: The order of objects in the list is crucial. Each step in the list has access to the changes made in all preceding steps. + +### `decision='files'` + When more files are needed to fulfill the user's request, use this decision to retrieve them. This decision is suitable for a subset of files. + + The files you request **must** be present in the `retrieved_paths_without_content` list. **Do not** request files that are already in the `retrieved_paths_with_content` list. + + **Request a small, focused set of files** (typically 2-4). + **Analyze the fetched content** (which will appear in `retrieved_paths_with_content`), ensure any of files won't be requested again. + **Repeat** Requesting more files that only in `retrieved_paths_without_content` if necessary. + + When the `files` decision is chosen, your response must be a **JSON list of strings**. Each string should be a complete, explicit file path. + The response must be a pure JSON array containing only the file paths you want to retrieve. Do not include any nested objects, additional keys, or conversational text. + + * **Example:** + ```json + [ + "/app/core/services/tts.py", + "/app/core/services/stt.py", + "/app/main.py" + ] + ``` + + **Constraints & Selection Criteria:** + + * **Format**: The JSON must contain only file paths. Do not include any other text, wildcards, or conversational elements. + * **Path Requirements**: Every file path must begin with a `/`. **Do not** include any paths not present in the `retrieved_paths_without_content` list. + * **Relevance**: Prioritize files that contain the core logic most relevant to the user's query. + * **Efficiency**: To avoid exceeding token limits, be highly selective. Request a small, focused set of **2-4 files**. + * **Exclusions**: **Do not** request non-text files (e.g., `.exe`, `.db`, `.zip`, `.jpg`). + * **Inference**: If the user's request or chat history references a specific file, use that as a strong hint to find the most similar path in the list. + * **Proactive Planning**: If the user's request implies a code change but file content is missing, proactively request the files you anticipate needing to successfully and correctly generate a code plan. This is your only opportunity to retrieve these files. + * **Redundancy**: **Do not** re-request files that are already available in `retrieved_paths_with_content`. + + **Other Tips:** + * Your decision-making process for **`code_change`** must include an evaluation of the user's request in the context of the codebase's size and complexity. + * If you've already requested multiple files and still find the information insufficient to fulfill the user's request, **narrow the scope of the question** based on the files you currently have. It’s okay if your response does not cover *all* necessary code changes—just make sure to explain this clearly in the reasoning. + * **Do NOT repeatedly or indefinitely request more files.** Be proactive in working with what is already available. + * If the request is for a **general code change** (e.g., refactoring the entire project) and the **codebase is small**, providing a `code_change` is a reasonable decision. + * If the request is too broad and the codebase is **large or complex** (as determined by `retrieved_paths_with_content`), you should **avoid** choosing `code_change`. Instead, guide the user to narrow the scope of their request before proceeding. + +----- + +## 3\. Final Output Requirements + + * **Strict Structure:** Your output must strictly adhere to the specified JSON format. + * **No Internal Leaks:** Do not mention internal system variables or the DSPy signature fields (`retrieved_paths_with_content`, `retrieved_paths_without_content`) in your reasoning or answer fields. The output should be user-friendly. + * **Precision:** Be helpful, precise, and adhere strictly to these rules. Do not hallucinate file paths or content. """ question = dspy.InputField(desc="The user's current question.") chat_history = dspy.InputField(desc="The ongoing dialogue between the user and the AI.") - # New Input Fields to make the data split explicit - retrieved_paths_with_content = dspy.InputField(desc="A JSON string of files that have been successfully retrieved with their full content.") - retrieved_paths_without_content = dspy.InputField(desc="A JSON string of files that have been found but their content has not yet been loaded (e.g., empty or null content).") + retrieved_paths_with_content = dspy.InputField(desc="A JSON list of file paths with their full content available.") + retrieved_paths_without_content = dspy.InputField(desc="A JSON list of file paths that exist but are not yet loaded.") reasoning = dspy.OutputField( - desc="First, determine if the artifacts are sufficient. Then, based on the question, the decision type should be either 'code_change' or 'answer'. Finally, analyze the question and determine the output fields." + desc="Step-by-step reasoning that explains the chosen `decision` and prepares the final output. This should include an analysis of the user's intent, the availability of required files, and the rationale behind the decision. If the decision involves using files, clearly state which files are already available, which additional files are needed, and why." ) decision = dspy.OutputField( - desc="Must be one of: 'answer', 'files', or 'code_change'." + desc="The decision type for the response. Must be one of: 'answer', 'files', or 'code_change'." ) - code_diff = dspy.OutputField( - desc=( - "If `decision` is 'code_change': the full, complete git diff of the proposed changes.\n" - "Leave this field empty if the decision is not 'code_change'." - ) - ) + answer = dspy.OutputField( desc=( "If `decision` is 'answer': a comprehensive, well-structured explanation in Markdown.\n" - "If `decision` is 'files': a JSON-formatted list of required file paths.\n" - "If `decision` is 'code_change': an optional, high-level summary of the proposed changes. Leave empty if no summary is needed." + "If `decision` is 'files': a JSON-formatted list of file paths to retrieve.\n" + "If `decision` is 'code_change': a high-level strategy plan for the proposed code changes." ) ) - + class CodeRagQuestionDecider(dspy.Module): def __init__(self, log_dir: str = "ai_payloads", history_formatter: Optional[Callable[[List[models.Message]], str]] = None): @@ -108,7 +236,7 @@ retrieved_data: A dictionary mapping file paths to file contents. Returns: - A tuple of (answer, decision, code_diff). + A tuple of (answer, decision). """ # --- INTERNAL LOGIC TO SPLIT DATA, WITH NULL/POINTER CHECKS --- @@ -145,13 +273,10 @@ "retrieved_paths_with_content": retrieved_with_content_json, "retrieved_paths_without_content": retrieved_without_content_json, } - prediction = await self.decider.acall(**input_payload) # Defensive handling and a clean way to access prediction fields decision = getattr(prediction, "decision", "").lower() answer = getattr(prediction, "answer", "") - code_diff = getattr(prediction, "code_diff", "") reasoning = getattr(prediction, "reasoning", "") - - return answer, reasoning, decision, code_diff \ No newline at end of file + return answer, reasoning, decision \ No newline at end of file diff --git a/ai-hub/app/core/providers/llm/general.py b/ai-hub/app/core/providers/llm/general.py index fdbf1a9..6221c5a 100644 --- a/ai-hub/app/core/providers/llm/general.py +++ b/ai-hub/app/core/providers/llm/general.py @@ -7,7 +7,7 @@ self.api_key = api_key self.system_prompt = system_prompt # Call the parent constructor - max_tokens = 1000 + max_tokens = 8000 if model_name == "gemini": max_tokens = 10000000 super().__init__(model=model_name, max_tokens=max_tokens, **kwargs) diff --git a/ai-hub/app/core/services/utils/code_change.py b/ai-hub/app/core/services/utils/code_change.py new file mode 100644 index 0000000..17f3863 --- /dev/null +++ b/ai-hub/app/core/services/utils/code_change.py @@ -0,0 +1,198 @@ +import logging +import json +import re +import dspy +import uuid +import asyncio +from sqlalchemy.orm import Session +from app.db import file_retriever_models +from typing import Dict, List, Any, Optional, Tuple +from app.core.providers.factory import get_llm_provider +from app.core.pipelines.code_changer import CodeRagCodeChanger +from fastapi import WebSocket +logger = logging.getLogger(__name__) + +class CodeChangeHelper: + """ + A helper class to process and manage a sequence of code change instructions. + """ + + def __init__(self, db: Session, provider_name: str, input_data: str): + """ + Initializes the CodeChangeHelper, parsing the input and setting up dependencies. + + Args: + db (Session): The database session. + provider_name (str): The name of the LLM provider. + input_data (str): A JSON string representing a list of code change steps. + """ + self.db = db + self.input_data = input_data + self.llm_provider = get_llm_provider(provider_name) + self.code_changer = CodeRagCodeChanger() + + self.parsed_data: List[Dict[str, Any]] = [] + self.updated_files: Dict[str, Dict[str, str]] = {} + self.original_files: Dict[str, str] = {} + self.last_step_index: int = -1 + try: + self._parse_input_data() + self._preload_original_files() + except (json.JSONDecodeError, ValueError) as e: + logger.error(f"Initialization failed due to invalid input: {e}") + raise + + def _parse_input_data(self) -> None: + """ + Parses the input JSON string and validates its structure. + """ + cleaned_input = re.sub(r"^```json\s*|\s*```$", "", self.input_data.strip(), flags=re.DOTALL) + + parsed_data = json.loads(cleaned_input) + if not isinstance(parsed_data, list): + raise ValueError("Input is not a JSON array.") + + required_keys = ["file_path", "change_instruction", "original_files", "updated_files", "action"] + for item in parsed_data: + if not all(key in item for key in required_keys): + raise ValueError(f"An item is missing required keys. Found: {list(item.keys())}, Required: {required_keys}") + + self.parsed_data = parsed_data + + def _preload_original_files(self) -> None: + """ + Fetches and caches the content of all required original files. + """ + unique_file_paths = set() + for item in self.parsed_data: + file_paths = item.get("original_files", []) + for path in file_paths: + unique_file_paths.add(path) + + for file_path in unique_file_paths: + content = self._fetch_file_content(file_path) + if content is not None: + self.original_files[file_path] = content + + def _fetch_file_content(self, file_path: str) -> Optional[str]: + """ + Fetches the content of a file from the database. + + Returns None if the file is not found or an error occurs. + """ + try: + # Assuming a single request_id for simplicity; adjust if needed + retrieved_file = self.db.query(file_retriever_models.RetrievedFile).filter_by(file_path=file_path).first() + if retrieved_file: + return retrieved_file.content + else: + logger.warning(f"File not found in the database: {file_path}") + return None + except Exception as e: + logger.error(f"Error fetching file content for '{file_path}': {e}") + return None + + async def _process_ai_question(self, item: Dict[str, Any]) -> Dict[str,str]: + """ + Processes a single code change instruction using the AI. + """ + # Prepare the list of original files from the preloaded cache + original_files_list = [ + {"file_path": path, "content": self.original_files.get(path, "")} + for path in item.get("original_files", []) + ] + + # Prepare the list of updated files from the current state + updated_files_list = [ + {"file_path": path, "content": data["content"], "reasoning": data["reasoning"]} + for path, data in self.updated_files.items() + ] + + instruction = item.get("change_instruction") + file_path_to_change = item.get("file_path") + overall_plan = self.input_data + + with dspy.context(lm=self.llm_provider): + content, reasoning = await self.code_changer.forward( + overall_plan=overall_plan, + instruction=instruction, + filepath=file_path_to_change, + original_files=original_files_list, + updated_files=updated_files_list + ) + return {"content": content, "reasoning": reasoning} + + async def _handle_thinking_log(self, websocket: WebSocket, reasoning :str): + client_log :Dict[str, Any] = { + "type": "thinking_log", + "content": reasoning + } + await websocket.send_text(json.dumps(client_log)) + + async def _handle_intermediate_chat_message(self, websocket: WebSocket): + # This list will hold all parts of the Markdown message. + md_content_parts = [ + "**AI-Generated Execution Plan:**" # A bold, clear title. + ] + + # Add each change instruction as a numbered list item. + for i, data in enumerate(self.parsed_data): + # Use f-string to create numbered list items with proper indentation. + md_content_parts.append(f"{i+1}. {data['change_instruction']}") + + # Add a final message with a smaller, right-aligned format. + # Markdown doesn't have a native "small" or "right-align" feature. + # A common workaround is to use HTML. + md_content_parts.append("\n*Start executing...*") + + # Join the list with newlines to form a single Markdown string. + formatted_content = "\n".join(md_content_parts) + + client_log: Dict[str, Any] = { + "type": "code_change", + "content": formatted_content, + "done": False, + } + await websocket.send_text(json.dumps(client_log)) + + async def _post_process(self) ->Dict[str, Dict[str, str]]: + result= {} + for file_path, detail in self.updated_files.items(): + original_content = self.original_files.get(file_path, "") + result[file_path] = {"old": original_content, "new": detail.get("content", ""), "reasoning": detail.get("reasoning", "")} + return result + + async def process(self, websocket: WebSocket) -> Dict[str, Dict[str, str]]: + """ + Executes all code change instructions in sequence. + + Returns: + A dictionary of all updated files with their content and reasoning. + """ + await self._handle_intermediate_chat_message(websocket) + for item in self.parsed_data: + action = item.get("action") + filepath = item.get("file_path") + reasoning = "" + if action == "delete": + reasoning = f"File deleted: {filepath}" + self.updated_files[filepath] = {"content": "", "reasoning": reasoning} + elif action == "move": + change_instruction = item.get("change_instruction","") + # Use regex to find all strings wrapped in backticks + matches = re.findall(r'`(.*?)`', change_instruction) + if matches: + # Get the last matched string, which is the target path + targetpath = matches[-1] + reasoning =f"File moved from {filepath} to {targetpath}" + self.updated_files[targetpath]= {"content": self.updated_files[filepath].get("content", ""), "reasoning": reasoning} + self.updated_files[filepath]= {"content": "", "reasoning":reasoning} + else: + # extract the target path. + response = await self._process_ai_question(item) + self.updated_files[filepath] = response + reasoning = response.get("reasoning", "") + dspy.inspect_history(n=1) + if reasoning: + await self._handle_thinking_log(websocket, reasoning) + return await self._post_process() \ No newline at end of file diff --git a/ai-hub/app/core/services/workspace.py b/ai-hub/app/core/services/workspace.py index 6915bce..4dadb28 100644 --- a/ai-hub/app/core/services/workspace.py +++ b/ai-hub/app/core/services/workspace.py @@ -13,9 +13,10 @@ from app.db import file_retriever_models from app.db.session import SessionLocal from app.core.providers.factory import get_llm_provider -from app.core.pipelines.file_selector import CodeRagFileSelector +# from app.core.pipelines.file_selector import CodeRagFileSelector from app.core.pipelines.dspy_rag import DspyRagPipeline from app.core.pipelines.question_decider import CodeRagQuestionDecider +from app.core.services.utils.code_change import CodeChangeHelper # A type hint for our handler functions MessageHandler = Callable[[WebSocket, Dict[str, Any]], Awaitable[None]] @@ -260,6 +261,8 @@ return retrieved_data + + async def get_file_content_by_request_id_and_path(self, db: Session, request_id: uuid.UUID, file_path: str) ->str: """ Retrieves a FileRetrievalRequest by its ID. @@ -271,47 +274,47 @@ logger.warning(f"File with path {file_path} not found for request ID {request_id} or has no content.") return "" - async def _handle_code_change_response(self, db: Session, request_id: str, code_diff: str) -> List[Dict[str, Any]]: - """ - Parses the diff, retrieves original file content, and returns a structured, - per-file dictionary for the client. - """ - # Normalize the diff string to ensure consistent splitting, handling cases where - # the separator may be missing a leading newline. - normalized_diff = re.sub(r'(? List[Dict[str, Any]]: + # """ + # Parses the diff, retrieves original file content, and returns a structured, + # per-file dictionary for the client. + # """ + # # Normalize the diff string to ensure consistent splitting, handling cases where + # # the separator may be missing a leading newline. + # normalized_diff = re.sub(r'(? Optional[List[str]]: @@ -348,89 +351,89 @@ return retrieved_files - def _format_diff(self, raw_diff: str) -> str: - # Remove Markdown-style code block markers - content = re.sub(r'^```diff\n|```$', '', raw_diff.strip(), flags=re.MULTILINE) + # def _format_diff(self, raw_diff: str) -> str: + # # Remove Markdown-style code block markers + # content = re.sub(r'^```diff\n|```$', '', raw_diff.strip(), flags=re.MULTILINE) - # Unescape common sequences - content = content.encode('utf-8').decode('unicode_escape') + # # Unescape common sequences + # content = content.encode('utf-8').decode('unicode_escape') - return content + # return content - def _apply_diff(self, original_content: str, file_diff: str) -> str: - """ - Applies a unified diff to the original content and returns the new content. + # def _apply_diff(self, original_content: str, file_diff: str) -> str: + # """ + # Applies a unified diff to the original content and returns the new content. - Args: - original_content: The original file content as a single string. - file_diff: The unified diff string. + # Args: + # original_content: The original file content as a single string. + # file_diff: The unified diff string. - Returns: - The new content with the diff applied. - """ - # Handle the case where the original content is empty. - if not original_content: - new_content: List[str] = [] - for line in file_diff.splitlines(keepends=True): - if line.startswith('+') and not line.startswith('+++'): - new_content.append(line[1:]) - return ''.join(new_content) + # Returns: + # The new content with the diff applied. + # """ + # # Handle the case where the original content is empty. + # if not original_content: + # new_content: List[str] = [] + # for line in file_diff.splitlines(keepends=True): + # if line.startswith('+') and not line.startswith('+++'): + # new_content.append(line[1:]) + # return ''.join(new_content) - original_lines = original_content.splitlines(keepends=True) - diff_lines = file_diff.splitlines(keepends=True) + # original_lines = original_content.splitlines(keepends=True) + # diff_lines = file_diff.splitlines(keepends=True) - i = 0 - new_content: List[str] = [] - orig_idx = 0 + # i = 0 + # new_content: List[str] = [] + # orig_idx = 0 - while i < len(diff_lines): - # Skip diff headers like --- and +++ - if diff_lines[i].startswith('---') or diff_lines[i].startswith('+++'): - i += 1 - continue + # while i < len(diff_lines): + # # Skip diff headers like --- and +++ + # if diff_lines[i].startswith('---') or diff_lines[i].startswith('+++'): + # i += 1 + # continue - # Hunk header - if not diff_lines[i].startswith('@@'): - i += 1 - continue + # # Hunk header + # if not diff_lines[i].startswith('@@'): + # i += 1 + # continue - hunk_header = diff_lines[i] - m = re.match(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@', hunk_header) - if not m: - raise ValueError(f"Invalid hunk header: {hunk_header.strip()}") + # hunk_header = diff_lines[i] + # m = re.match(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@', hunk_header) + # if not m: + # raise ValueError(f"Invalid hunk header: {hunk_header.strip()}") - orig_start = int(m.group(1)) - 1 # convert from 1-based to 0-based index - i += 1 + # orig_start = int(m.group(1)) - 1 # convert from 1-based to 0-based index + # i += 1 - # Copy unchanged lines before this hunk - while orig_idx < orig_start: - new_content.append(original_lines[orig_idx]) - orig_idx += 1 + # # Copy unchanged lines before this hunk + # while orig_idx < orig_start: + # new_content.append(original_lines[orig_idx]) + # orig_idx += 1 - # Process lines in hunk - while i < len(diff_lines): - line = diff_lines[i] + # # Process lines in hunk + # while i < len(diff_lines): + # line = diff_lines[i] - if line.startswith('@@'): - # Start of next hunk - break - elif line.startswith(' '): - # Context line - if orig_idx < len(original_lines): - new_content.append(original_lines[orig_idx]) - orig_idx += 1 - elif line.startswith('-'): - # Removed line - orig_idx += 1 - elif line.startswith('+'): - # Added line - new_content.append(line[1:]) - i += 1 + # if line.startswith('@@'): + # # Start of next hunk + # break + # elif line.startswith(' '): + # # Context line + # if orig_idx < len(original_lines): + # new_content.append(original_lines[orig_idx]) + # orig_idx += 1 + # elif line.startswith('-'): + # # Removed line + # orig_idx += 1 + # elif line.startswith('+'): + # # Added line + # new_content.append(line[1:]) + # i += 1 - # Add remaining lines from original - new_content.extend(original_lines[orig_idx:]) + # # Add remaining lines from original + # new_content.extend(original_lines[orig_idx:]) - return ''.join(new_content) + # return ''.join(new_content) async def send_command(self, websocket: WebSocket, command_name: str, data: Dict[str, Any] = {}): @@ -507,49 +510,55 @@ })) return await self._store_retrieved_files(request_id=uuid.UUID(request_id), files=files) - - session = self.db.query(models.Session).options( - joinedload(models.Session.messages) - ).filter(models.Session.id == file_request.session_id).first() + await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) - provider_name = data.get("provider_name", "gemini") - llm_provider = get_llm_provider(provider_name) - cfs = CodeRagFileSelector() - retrieved_data = await self._retrieve_by_request_id(self.db, request_id=request_id) - with dspy.context(lm=llm_provider): - raw_answer_text = await cfs( - question=file_request.question, - retrieved_data=retrieved_data, - history=session.messages - ) - try: - # Use ast.literal_eval for a safe and reliable parse - answer_text = ast.literal_eval(raw_answer_text) - except (ValueError, SyntaxError) as e: - # Handle cases where the LLM output is not a valid list string. - print(f"Error parsing LLM output: {e}") - answer_text = [] # Default to an empty list to prevent errors. - await websocket.send_text(json.dumps({ - "type": "thinking_log", - "content": f"Warning: AI's file list could not be parsed. Error: {e}" - })) - return + # session = self.db.query(models.Session).options( + # joinedload(models.Session.messages) + # ).filter(models.Session.id == file_request.session_id).first() + + # provider_name = data.get("provider_name", "gemini") + # llm_provider = get_llm_provider(provider_name) + # cfs = CodeRagFileSelector() + # retrieved_data = await self._retrieve_by_request_id(self.db, request_id=request_id) + # with dspy.context(lm=llm_provider): + # raw_answer_text ,reasoning = await cfs( + # question=file_request.question, + # retrieved_data=retrieved_data, + # history=session.messages + # ) + # assistant_message = models.Message(session_id=file_request.session_id, sender="assistant", content=f'${reasoning}: ${raw_answer_text}') + # self.db.add(assistant_message) + # self.db.commit() + # dspy.inspect_history(n=1) # Inspect the last DSPy operation for debugging + + # try: + # # Use ast.literal_eval for a safe and reliable parse + # answer_text = ast.literal_eval(raw_answer_text) + # except (ValueError, SyntaxError) as e: + # # Handle cases where the LLM output is not a valid list string. + # print(f"Error parsing LLM output: {e}") + # answer_text = [] # Default to an empty list to prevent errors. + # await websocket.send_text(json.dumps({ + # "type": "thinking_log", + # "content": f"Warning: AI's file list could not be parsed. Error: {e}" + # })) + # return - if len(answer_text) == 0: - await websocket.send_text(json.dumps({ - "type": "thinking_log", - "content": "AI did not select any files to retrieve content for." - })) - await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) - return + # if len(answer_text) == 0: + # await websocket.send_text(json.dumps({ + # "type": "thinking_log", + # "content": "AI did not select any files to retrieve content for." + # })) + # await self.handle_files_content_response(websocket, {"files": [], "request_id": request_id, "session_id": file_request.session_id}) + # return - await websocket.send_text(json.dumps({ - "type": "thinking_log", - "content": f"AI selected files: {answer_text}. Now requesting file content." - })) + # await websocket.send_text(json.dumps({ + # "type": "thinking_log", + # "content": f"AI selected files: {answer_text}. Now requesting file content." + # })) - # After getting the AI's selected files, we send a command to the client to get their content. - await self.send_command(websocket, "get_file_content", data={"filepaths": answer_text, "request_id": request_id}) + # # After getting the AI's selected files, we send a command to the client to get their content. + # await self.send_command(websocket, "get_file_content", data={"filepaths": answer_text, "request_id": request_id}) async def handle_files_content_response(self, websocket: WebSocket, data: Dict[str, Any]): """ @@ -559,12 +568,13 @@ request_id = data.get("request_id") session_id = data.get("session_id") - if not files_data: - print(f"Warning: No files data received for request_id: {request_id}") - else: - print(f"Received content for {len(files_data)} files (request_id: {request_id}).") - await self._update_file_content(request_id=uuid.UUID(request_id), files_with_content=files_data) - + if not request_id: + await websocket.send_text(json.dumps({ + "type": "error", + "content": "Error: request_id is required to process file content." + })) + return + if not session_id: await websocket.send_text(json.dumps({ "type": "error", @@ -572,12 +582,11 @@ })) return - if not request_id: - await websocket.send_text(json.dumps({ - "type": "error", - "content": "Error: request_id is required to process file content." - })) - return + if not files_data: + print(f"Warning: No files data received for request_id: {request_id}") + else: + print(f"Received content for {len(files_data)} files (request_id: {request_id}).") + await self._update_file_content(request_id=uuid.UUID(request_id), files_with_content=files_data) # Retrieve the updated context from the database context_data = await self._retrieve_by_request_id(self.db, request_id=request_id) @@ -589,6 +598,7 @@ "content": "An internal error occurred. Please try again." })) return + await websocket.send_text(json.dumps({ "type": "thinking_log", "content": f"AI is analyzing the retrieved files to determine next steps." @@ -601,37 +611,39 @@ # Use the LLM to make a decision with dspy.context(lm=get_llm_provider(provider_name="gemini")): crqd = CodeRagQuestionDecider() - raw_answer_text, reasoning, decision, code_diff = await crqd( + raw_answer_text, reasoning, decision = await crqd( question=context_data.get("question", ""), history=session.messages, retrieved_data=context_data ) - dspy.inspect_history(n=1) # Inspect the last DSPy operation for debugging - if decision in [ "code_change", "answer"]: + dspy.inspect_history(n=1) + + if decision == "answer": + # Handle regular answer assistant_message = models.Message(session_id=session_id, sender="assistant", content=raw_answer_text) self.db.add(assistant_message) self.db.commit() self.db.refresh(assistant_message) + await websocket.send_text(json.dumps({ + "type": "chat_message", + "content": raw_answer_text, + "reasoning": reasoning + })) - if decision == "files": + elif decision == "files": + # Handle file retrieval request await websocket.send_text(json.dumps({ "type": "thinking_log", - "content": f"AI decided more files are needed: {raw_answer_text}." + "content": f"AI decided files are needed: {raw_answer_text}." })) try: - # Use regex to find the JSON content, including any surrounding newlines and code blocks json_match = re.search(r'\[.*\]', raw_answer_text, re.DOTALL) if json_match: - # Extract the matched JSON string json_string = json_match.group(0) - - # Use ast.literal_eval for a safe and reliable parse answer_text = ast.literal_eval(json_string) - if not isinstance(answer_text, list): raise ValueError("Parsed result is not a list.") else: - # Fallback if no markdown is found answer_text = ast.literal_eval(raw_answer_text) if not isinstance(answer_text, list): raise ValueError("Parsed result is not a list.") @@ -643,24 +655,44 @@ "content": f"Warning: AI's file list could not be parsed. Error: {e}" })) return - + assistant_message = models.Message(session_id=session_id, sender="assistant", content=f"{reasoning}\n Request Files: {answer_text} ") + self.db.add(assistant_message) + self.db.commit() + self.db.refresh(assistant_message) await self.send_command(websocket, "get_file_content", data={"filepaths": answer_text, "request_id": request_id}) elif decision == "code_change": - diffs =await self._handle_code_change_response(db=self.db, request_id=request_id, code_diff=code_diff) - for diff in diffs: - diff["diff"] = self._format_diff(diff.get("diff","")) - payload = json.dumps({ - "type": "chat_message", - "content": raw_answer_text, - "reasoning": reasoning, - "dicision" : decision, - "code_diff":diffs - }) - logger.info(f"Sending code change response to client: {payload}") - await websocket.send_text(payload) + # Handle code change request + await websocket.send_text(json.dumps({ + "type": "thinking_log", + "content": "AI is generating the necessary code changes. This may take a moment." + })) - else: # decision is "answer" + try: + # The input_data is a JSON string of code change instructions + cch = CodeChangeHelper(db=self.db, provider_name="gemini", input_data=raw_answer_text) + + # Use the CodeChangeHelper to process all code changes + final_changes = await cch.process(websocket=websocket) + + # Send the final processed changes to the client + payload = json.dumps({ + "type": "code_change", + "code_changes": final_changes, + "content": "Completed all requested code changes.", + "done": True + }) + logger.info(f"Sending code change response to client: {payload}") + await websocket.send_text(payload) + + except (json.JSONDecodeError, ValueError) as e: + logger.error(f"Error processing code changes: {e}") + await websocket.send_text(json.dumps({ + "type": "error", + "content": f"Failed to process code change request. Error: {e}" + })) + + else: # Fallback for any other decision await websocket.send_text(json.dumps({ "type": "thinking_log", "content": f"Answering user's question directly." @@ -670,6 +702,7 @@ "content": raw_answer_text, "reasoning": reasoning })) + async def handle_command_output(self, websocket: WebSocket, data: Dict[str, Any]): """Handles the output from a command executed by the client.""" diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..fc2529f --- /dev/null +++ b/setup.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Check the operating system +OS="$(uname -s)" + +# --- Setup for macOS --- +if [ "$OS" == "Darwin" ]; then + echo "Detected macOS. Using Homebrew for setup." + + # Check for Homebrew, install if not present + if ! command -v brew &> /dev/null; then + echo "Homebrew not found. Please install it first:" + echo '/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"' + exit 1 + fi + + echo "Updating Homebrew..." + brew update + + echo "Installing Node.js..." + brew install node + +# --- Setup for Linux (Debian/Ubuntu) --- +elif [ "$OS" == "Linux" ]; then + echo "Detected Linux. Assuming Debian/Ubuntu-based system for setup." + + # Update package list + sudo apt-get update + + # Install curl if not installed + if ! command -v curl &> /dev/null; then + sudo apt-get install -y curl + fi + + # Download and run NodeSource setup script for Node.js 18.x + curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - + + # Install Node.js + sudo apt-get install -y nodejs + +else + echo "Unsupported operating system: $OS" + exit 1 +fi + +# --- Common Steps --- +# Check versions installed +node -v +npm -v +pip install -e ./ai-hub + +# Install concurrently globally +echo "Installing concurrently globally..." +npm install -g concurrently + +echo "Setup complete!" \ No newline at end of file diff --git a/ui/client-app/package-lock.json b/ui/client-app/package-lock.json index 8cfa3e9..6c7b805 100644 --- a/ui/client-app/package-lock.json +++ b/ui/client-app/package-lock.json @@ -1,11 +1,11 @@ { - "name": "tts-client-app", + "name": "client-app", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "tts-client-app", + "name": "client-app", "version": "0.1.0", "dependencies": { "@tailwindcss/cli": "^4.1.11", @@ -13,6 +13,8 @@ "@testing-library/jest-dom": "^6.6.4", "@testing-library/react": "^16.3.0", "@testing-library/user-event": "^13.5.0", + "diff": "^8.0.2", + "diff2html": "^3.4.52", "react": "^19.1.1", "react-dom": "^19.1.1", "react-icons": "^5.5.0", @@ -4990,6 +4992,11 @@ "deprecated": "Use your platform's native atob() and btoa() methods instead", "license": "BSD-3-Clause" }, + "node_modules/abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==" + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -7425,6 +7432,14 @@ "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", "license": "Apache-2.0" }, + "node_modules/diff": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.2.tgz", + "integrity": "sha512-sSuxWU5j5SR9QQji/o2qMvqRNYRDOcBTgsJ/DeCf4iSN4gW+gNMXM7wFIP+fdXZxoNiAnHUTGjCr+TSWXdRDKg==", + "engines": { + "node": ">=0.3.1" + } + }, "node_modules/diff-sequences": { "version": "27.5.1", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-27.5.1.tgz", @@ -7434,6 +7449,29 @@ "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" } }, + "node_modules/diff2html": { + "version": "3.4.52", + "resolved": "https://registry.npmjs.org/diff2html/-/diff2html-3.4.52.tgz", + "integrity": "sha512-qhMg8/I3sZ4zm/6R/Kh0xd6qG6Vm86w6M+C9W+DuH1V8ACz+1cgEC8/k0ucjv6AGqZWzHm/8G1gh7IlrUqCMhg==", + "dependencies": { + "diff": "^7.0.0", + "hogan.js": "3.0.2" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "highlight.js": "11.9.0" + } + }, + "node_modules/diff2html/node_modules/diff": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-7.0.0.tgz", + "integrity": "sha512-PJWHUb1RFevKCwaFA9RlG5tCd+FO5iRh9A8HEtkmBH2Li03iJriB6m6JIN4rGz3K3JLawI7/veA1xzRKP6ISBw==", + "engines": { + "node": ">=0.3.1" + } + }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -9679,6 +9717,36 @@ "he": "bin/he" } }, + "node_modules/highlight.js": { + "version": "11.9.0", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.9.0.tgz", + "integrity": "sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==", + "optional": true, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/hogan.js": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/hogan.js/-/hogan.js-3.0.2.tgz", + "integrity": "sha512-RqGs4wavGYJWE07t35JQccByczmNUXQT0E12ZYV1VKYu5UiAU9lsos/yBAcf840+zrUQQxgVduCR5/B8nNtibg==", + "dependencies": { + "mkdirp": "0.3.0", + "nopt": "1.0.10" + }, + "bin": { + "hulk": "bin/hulk" + } + }, + "node_modules/hogan.js/node_modules/mkdirp": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.3.0.tgz", + "integrity": "sha512-OHsdUcVAQ6pOtg5JYWpCBo9W/GySVuwvP9hueRMW7UqshC0tbfzLv8wjySTPm3tfUZ/21CE9E1pJagOA91Pxew==", + "deprecated": "Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)", + "engines": { + "node": "*" + } + }, "node_modules/hoopy": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/hoopy/-/hoopy-0.1.4.tgz", @@ -13317,6 +13385,20 @@ "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", "license": "MIT" }, + "node_modules/nopt": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-1.0.10.tgz", + "integrity": "sha512-NWmpvLSqUrgrAC9HCuxEvb+PSloHpqVu+FqcO4eeF2h5qYRhA7ev6KvelyQAKtegUbC6RypJnlEOhd8vloNKYg==", + "dependencies": { + "abbrev": "1" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": "*" + } + }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", diff --git a/ui/client-app/package.json b/ui/client-app/package.json index e404eac..0463eb8 100644 --- a/ui/client-app/package.json +++ b/ui/client-app/package.json @@ -1,5 +1,5 @@ { - "name": "tts-client-app", + "name": "client-app", "version": "0.1.0", "private": true, "dependencies": { @@ -8,6 +8,8 @@ "@testing-library/jest-dom": "^6.6.4", "@testing-library/react": "^16.3.0", "@testing-library/user-event": "^13.5.0", + "diff": "^8.0.2", + "diff2html": "^3.4.52", "react": "^19.1.1", "react-dom": "^19.1.1", "react-icons": "^5.5.0", diff --git a/ui/client-app/src/components/ChatArea.css b/ui/client-app/src/components/ChatArea.css index 4b4c6d6..312e7ae 100644 --- a/ui/client-app/src/components/ChatArea.css +++ b/ui/client-app/src/components/ChatArea.css @@ -1,3 +1,3 @@ .chat-area-fixed-height { height: calc(100vh - 72px); /* Subtract input + padding */ - } \ No newline at end of file + } diff --git a/ui/client-app/src/components/ChatWindow.css b/ui/client-app/src/components/ChatWindow.css index a573ef5..c356690 100644 --- a/ui/client-app/src/components/ChatWindow.css +++ b/ui/client-app/src/components/ChatWindow.css @@ -19,4 +19,70 @@ word-wrap: break-word; /* Prevents overflow */ line-height: 1.5; color: #333; - } \ No newline at end of file + } + +/* + This CSS file provides styles for an ordered list to make it + look more like a step-by-step guide with custom numbering, + styled to match the provided file list component. +*/ + +/* Container for the list */ +ol { + list-style: none; + padding: 0; + counter-reset: step-counter; + margin: 1rem 0; + font-family: 'Inter', sans-serif; +} + +/* Style for each list item, simulating the file list item's look */ +ol li { + background-color: #f7f9fc; + border: 1px solid #e0e6ed; + border-radius: 8px; + padding: 1rem; + margin-bottom: 0.75rem; + display: flex; + align-items: flex-start; + gap: 1rem; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); + transition: transform 0.2s ease, box-shadow 0.2s ease; + cursor: pointer; /* Added a cursor pointer to make it feel clickable */ +} + +/* Add a hover effect to match the file list */ +ol li:hover { + background-color: #e6e9ef; /* Adjusted hover color to be slightly darker */ + transform: translateY(-2px); + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); +} + +/* The magic for the step prefix using a pseudo-element */ +ol li::before { + content: "Step " counter(step-counter) "."; /* Added a period for clarity */ + counter-increment: step-counter; + font-size: 0.9rem; + font-weight: bold; + color: #3b82f6; /* Used the vibrant blue from the icon */ + flex-shrink: 0; +} + +/* Styling for the text content within the list item */ +ol li p { + margin: 0; + font-size: 0.9rem; + line-height: 1.5; + color: #333; +} + + +.d2h-code-line, +.d2h-code-side-line { + position: static !important; +} + +/* Optional: Prevent sticky file headers */ +.d2h-file-header { + position: static !important; +} \ No newline at end of file diff --git a/ui/client-app/src/components/ChatWindow.js b/ui/client-app/src/components/ChatWindow.js index ce79bc1..36cf1d2 100644 --- a/ui/client-app/src/components/ChatWindow.js +++ b/ui/client-app/src/components/ChatWindow.js @@ -44,10 +44,10 @@ )} {message.text} - {message.code_diff && ( - + {message.code_changes && ( + )} - {selectedFile && } + {selectedFile && } ); }; diff --git a/ui/client-app/src/components/DiffViewer.css b/ui/client-app/src/components/DiffViewer.css new file mode 100644 index 0000000..5c24a89 --- /dev/null +++ b/ui/client-app/src/components/DiffViewer.css @@ -0,0 +1,8 @@ +.d2h-code-side-linenumber, +.d2h-code-linenumber, +.d2h-info, +.d2h-file-header { + position: static !important; + background: inherit !important; + z-index: auto !important; +} \ No newline at end of file diff --git a/ui/client-app/src/components/DiffViewer.js b/ui/client-app/src/components/DiffViewer.js index 156839b..ff6d979 100644 --- a/ui/client-app/src/components/DiffViewer.js +++ b/ui/client-app/src/components/DiffViewer.js @@ -1,40 +1,118 @@ -// FileListComponent.jsx -import React from "react"; +import React, { useState, useEffect } from "react"; +import { createTwoFilesPatch } from 'diff'; +import { parse, html } from 'diff2html'; +import 'diff2html/bundles/css/diff2html.min.css'; +import './DiffViewer.css'; -const DiffViewer = ({ diff, onClose }) => { - if (!diff) return null; +const DiffViewer = ({ oldContent, newContent, filePath, onClose }) => { + const [diffHtml, setDiffHtml] = useState(""); + const [isDropdownOpen, setIsDropdownOpen] = useState(false); - const lines = diff.split('\n'); + useEffect(() => { + // Generate the unified diff format + const generatedDiff = createTwoFilesPatch( + filePath, + filePath, + oldContent, + newContent, + 'old version', + 'new version' + ); - const handleDownload = () => { - // Create a Blob from the diff string - const blob = new Blob([diff], { type: 'text/plain' }); + // Parse the unified diff to an internal data structure + const diffJson = parse(generatedDiff); + + // Use diff2html to generate the side-by-side HTML + const configuration = { + drawFileList: true, + matching: 'lines', + outputFormat: 'side-by-side' // Crucial for side-by-side view + }; + const htmlOutput = html(diffJson, configuration); + setDiffHtml(htmlOutput); + }, [oldContent, newContent, filePath]); + + if (!diffHtml) return null; + + const handleDownload = (fileType) => { + let content, filename; + + switch (fileType) { + case 'diff': + content = createTwoFilesPatch( + filePath, + filePath, + oldContent, + newContent, + 'old version', + 'new version' + ); + filename = `${filePath.split('/').pop()}_changes.diff`; + break; + case 'old': + content = oldContent; + filename = `${filePath.split('/').pop()}_old.txt`; + break; + case 'new': + content = newContent; + filename = `${filePath.split('/').pop()}_new.txt`; + break; + default: + return; + } + + const blob = new Blob([content], { type: 'text/plain' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; - a.download = 'file_changes.diff'; // Specify a default filename + a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); + setIsDropdownOpen(false); // Close the dropdown after download }; return (
-
+
-

File Changes

+

Changes in {filePath}

- {/* Download Button */} - - {/* Close Button */} +
+ + {isDropdownOpen && ( +
+
+ + + +
+
+ )} +
-
-          {lines.map((line, index) => {
-            let color = 'text-gray-900 dark:text-gray-100';
-            if (line.startsWith('+')) {
-              color = 'text-green-600 dark:text-green-400';
-            } else if (line.startsWith('-')) {
-              color = 'text-red-600 dark:text-red-400';
-            }
-            return (
-              
- {line} -
- ); - })} -
+
); diff --git a/ui/client-app/src/components/FileList.js b/ui/client-app/src/components/FileList.js index c4f9de2..80c6d9c 100644 --- a/ui/client-app/src/components/FileList.js +++ b/ui/client-app/src/components/FileList.js @@ -1,12 +1,17 @@ // FileListComponent.jsx import React from "react"; +import ReactMarkdown from 'react-markdown'; -const FileListComponent = ({ files, onFileClick }) => { +const FileListComponent = ({ code_changes, onFileClick }) => { + const filesArray = code_changes ? Object.keys(code_changes).map(filepath => ({ + filepath, + ...code_changes[filepath] + })) : []; + return (
Files:
- {files.map((file, index) => ( - // Once clicked, it calls onFileClick , it has file.diff and file.original_content and file.new_content , use the diff to show the changes nicely + {filesArray.map((file, index) => (
{file.filepath}
+
+ {file.reasoning} +
))}
diff --git a/ui/client-app/src/components/InteractionLog.js b/ui/client-app/src/components/InteractionLog.js index 517c27e..4d2f31f 100644 --- a/ui/client-app/src/components/InteractionLog.js +++ b/ui/client-app/src/components/InteractionLog.js @@ -1,4 +1,5 @@ import React, { useState, useEffect, useRef } from "react"; +import ReactMarkdown from 'react-markdown'; const InteractionLog = ({ logs }) => { const [expandedLogs, setExpandedLogs] = useState({}); @@ -68,9 +69,7 @@

{log.type.charAt(0).toUpperCase() + log.type.slice(1)}:

-
-                  {isExpanded ? log.message : getPreviewText(log.message)}
-                
+ {isExpanded ? log.message : getPreviewText(log.message)} {!isExpanded && log.message.length > 200 && (

Click to expand diff --git a/ui/client-app/src/hooks/useCodeAssistant.js b/ui/client-app/src/hooks/useCodeAssistant.js index cb9451b..a8bfcf9 100644 --- a/ui/client-app/src/hooks/useCodeAssistant.js +++ b/ui/client-app/src/hooks/useCodeAssistant.js @@ -24,12 +24,26 @@ isUser: false, text: message.content, dicision: message.dicision, - code_diff: message.code_diff, reasoning: message.reasoning }]); setIsProcessing(false); }, []); + const handleCodeChange = useCallback((message) => { + console.log("Received code change:", message); + setChatHistory((prev) => [...prev, { + isUser: false, + text: message.content, + code_changes: message.code_changes + }]); + if (message.done === true){ + setIsProcessing(false); + } else{ + setIsProcessing(true); + } + }, []); + + const handleThinkingLog = useCallback((message) => { setThinkingProcess((prev) => [...prev, { type: "remote", @@ -209,6 +223,9 @@ case "chat_message": handleChatMessage(message); break; + case "code_change": + handleCodeChange(message); + break; case "thinking_log": handleThinkingLog(message); break; @@ -232,6 +249,7 @@ } }, [ handleChatMessage, + handleCodeChange, handleThinkingLog, handleError, handleStatusUpdate, diff --git a/ui/client-app/src/services/websocket.js b/ui/client-app/src/services/websocket.js index 2dd1b1e..910534b 100644 --- a/ui/client-app/src/services/websocket.js +++ b/ui/client-app/src/services/websocket.js @@ -40,7 +40,7 @@ // NOTE: The line `sessionId = null;` has been removed as it was for testing purposes // and would force a new session on every connection. - + sessionId = null if (!sessionId) { // No existing session, so create one via API const session = await createSession(); @@ -57,7 +57,14 @@ // Use `wss` for `https` and `ws` for `http`. const url = new URL(API_BASE_URL); const wsProtocol = url.protocol === "https:" ? "wss" : "ws"; - const websocketUrl = `${wsProtocol}://${url.host}${url.pathname}/ws/workspace/${sessionId}`; + + let pathname = url.pathname; + // Check if the pathname is just a single slash, if so, don't append it to avoid a double slash + if (pathname === "/") { + pathname = ""; + } + + const websocketUrl = `${wsProtocol}://${url.host}${pathname}/ws/workspace/${sessionId}`; console.log("Connecting to WebSocket URL:", websocketUrl); const ws = new WebSocket(websocketUrl); diff --git a/ui/run_web.sh b/ui/run_web.sh index 901ce81..3ce6bf4 100644 --- a/ui/run_web.sh +++ b/ui/run_web.sh @@ -18,7 +18,7 @@ # Resolve script directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" AI_HUB_DIR="$(realpath "$SCRIPT_DIR/../ai-hub")" -TTS_CLIENT_DIR="$SCRIPT_DIR/client-app" +CLIENT_DIR="$SCRIPT_DIR/client-app" AI_HUB_HOST="0.0.0.0" AI_HUB_PORT="8001" @@ -70,6 +70,12 @@ FRONTEND_ENV="HTTPS=true" fi +# New step: Install frontend dependencies +echo "--- Installing frontend dependencies ---" +pushd "$CLIENT_DIR" > /dev/null +npm install +popd > /dev/null + echo "--- Starting AI Hub Server, React frontend, and backend proxy ---" # Run backend and frontend concurrently @@ -77,6 +83,6 @@ --prefix "[{name}]" \ --names "aihub,tts-frontend" \ "LOG_LEVEL=DEBUG uvicorn $APP_MODULE --host $AI_HUB_HOST --log-level debug --port $AI_HUB_PORT $SSL_ARGS --reload" \ - "cd $TTS_CLIENT_DIR && $FRONTEND_ENV HOST=0.0.0.0 PORT=8000 npm start" + "cd $CLIENT_DIR && $FRONTEND_ENV HOST=0.0.0.0 PORT=8000 npm start" popd > /dev/null diff --git a/ui/setup.sh b/ui/setup.sh deleted file mode 100644 index cfb2cd3..0000000 --- a/ui/setup.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Update package list -sudo apt-get update - -# Install curl if not installed -sudo apt-get install -y curl - -# Download and run NodeSource setup script for Node.js 18.x (latest LTS) -curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - - -# Install Node.js (includes npm) -sudo apt-get install -y nodejs - -# Check versions installed -node -v -npm -v - -npm install -g concurrently