Newer
Older
cortex-hub / ai-hub / app / core / pipelines / code_changer.py
import dspy
import json
import os
from typing import List, Dict, Any, Tuple, Optional, Callable


class CodeChanger(dspy.Signature):
    """
    ### 🧠 Core Directives

    You are a code generation assistant specialized in producing **one precise and complete code change** per instruction. Your output must be a strict JSON object containing:

    - `reasoning`: A concise explanation of the change.
    - `content`: The **full content of the file** (or an empty string for deletions).

    No extra output is allowed—**only the JSON object.**

    ---

    ### 1. Input Structure

    You will be provided:

    - `instruction`: A plain-text directive specifying the desired code change.
    - `original_files`: A list of unmodified files from the codebase:
    ```json
    [
        {
        "file_path": "/app/main.py",
        "content": "# main.py\\n\\ndef new_function():\\n    pass\\n\\nnew_function()\\n"
        },
        ...
    ]
    ````

    - `updated_files`: Files already modified in prior steps:

    <!-- end list -->

    ```json
    [
        {
        "file_path": "/app/main.py",
        "reasoning": "...",
        "content": "..."
        },
        ...
    ]
    ```

    -----

    ### 2\. Code Generation Rules

    Your task is to output **one full-file code change** for the specified `file_path`, using the instruction and file context.


    ### **Code Quality Requirements**

    * **No placeholders or pseudocode.** All code must be complete, functional, and ready to execute.
    * **Provide the full code.** When making changes, always output the entire, modified file. Do not use abbreviations, placeholders, or `TODO` comments.
    * **Ensure code integrity.** The updated code, when combined with the existing project files, must form a seamless, complete, and fully executable codebase. It should be both readable and extensible.
    * **Include all dependencies and imports.** The code must be self-contained and immediately executable without requiring the user to add missing imports.
    * **All definitions must be resolvable.** Ensure that all functions, variables, and return values are defined within the scope of the provided file.
    * **Modular and well-structured.** The code should be modular, logically organized, and include clear, concise comments where necessary.
    * **Follow best practices.** Adhere to good naming conventions and ensure a logical flow throughout the code.

    #### 🔹 Change Types

    * **File Modification**:
        - Provide the entire updated file in `content`.
    * **File Creation**:
        - Include full file content in `content`.
    * **File Deletion**:
        - Set `content` to `""` and explain the deletion in `reasoning`.

    -----

    ### 3\. Output Format

    Return exactly one JSON object:

    ```json
    {
    "reasoning": "Brief explanation of the change.",
    "content": "Full file content here"
    }
    ```

    **Do not output any explanation, headers, or text outside this JSON.**
    """
  
    overall_plan = dspy.InputField(desc="The high-level strategy for the code changes.")
    instruction = dspy.InputField(desc="The specific instruction for this step of the code change.")
    filepath = dspy.InputField(desc="The path of the file to be changed, created, or deleted.")
    
    original_files = dspy.InputField(
        desc="A JSON list of dictionaries with 'file_path' and 'content' for the original files."
    )

    updated_files = dspy.InputField(
        desc="A JSON list of dictionaries with 'file_path' and 'content' for files modified by previous steps."
    )

    reasoning = dspy.OutputField(desc="A detailed reasoning process for the code change.")
    content = dspy.OutputField(desc="The generated code.")


class CodeRagCodeChanger(dspy.Module):
    """
    A single-step module to generate code changes based on user instructions and relevant files.
    """

    def __init__(self):
        super().__init__()
        self.code_changer = dspy.ChainOfThought(CodeChanger)

    async def forward(
        self,
        overall_plan: str,
        instruction: str,
        filepath: str,
        original_files: List[Dict[str, Any]],
        updated_files: List[Dict[str, Any]]
    ) -> Tuple[str, str]:
        
        # Convert dictionaries to JSON strings for the model
        original_json = json.dumps(original_files)
        updated_json = json.dumps(updated_files)

        # Generate prediction
        prediction = await self.code_changer.acall(
            overall_plan=overall_plan,
            instruction=instruction,
            filepath=filepath,
            original_files=original_json,
            updated_files=updated_json
        )

        # Return code diff and reasoning
        return prediction.content, prediction.reasoning