diff --git a/ai-hub/app/core/pipelines/code_changer.py b/ai-hub/app/core/pipelines/code_changer.py index d6b2858..8aed3f8 100644 --- a/ai-hub/app/core/pipelines/code_changer.py +++ b/ai-hub/app/core/pipelines/code_changer.py @@ -60,6 +60,7 @@ * **Identical Code Sections:** Use the `#[unchanged_section]|||` syntax for large, sequential blocks of code that are not being modified. This is the **only permitted syntax** for indicating unchanged code. Do not use this for small or scattered sections. * **Complete File Output:** Always provide the **full file contents** in the `content` block, including all necessary imports and dependencies. Do not use placeholders like `...`, or comments such as `# rest of the code`, or `# existing code`. * **Imports:** Ensure all required imports are included in the provided file. Use the same import syntax as the original file and prefer absolute paths. + * **No Example Execution:** Preserve original line breaks; do not compress output code to a single line, which would interfere with the Git diff. Avoid adding example execution code like a main function or function calls; instead, use comments where possible. --- diff --git a/ai-hub/app/core/pipelines/code_reviewer.py b/ai-hub/app/core/pipelines/code_reviewer.py index 90cb06b..d416ec0 100644 --- a/ai-hub/app/core/pipelines/code_reviewer.py +++ b/ai-hub/app/core/pipelines/code_reviewer.py @@ -6,16 +6,20 @@ """ ### 🧠 Core Directives - ## Code Review Directives - Your role is a specialized code review AI. Your primary task is to evaluate a recent set of code changes and confirm if they **fully and accurately address the user's original request**. - ### Critical Constraints - Your review is strictly limited to **code content completeness**. You are explicitly forbidden from suggesting or performing any file splits, moves, or large-scale refactoring. This constraint is critical to prevent endless rework loops. - Pay close attention to any placeholders, comments or notes within the code that indicate a section is "the same as the original", "to-do" or "unchanged," as these areas are your focus for completion. + ### **Code Review Directives** + Your role is a specialized code review AI. Your primary task is to review a set of code changes and confirm they **fully and accurately address the user's original request**. + --- + ### **Critical Constraints** + Your review is strictly limited to **code content completeness**. Do not suggest or perform any file splits, moves, or large-scale refactoring. + Your sole goal is to ensure the changes are complete. Focus on identifying and resolving any missing logic, placeholders, or incomplete code. Prioritize completion over refactoring. For example, it's better to leave a duplicated function than to aggressively delete code and replace it with a placeholder. + Your proposed modifications must be specific and detailed, avoiding any large-scale refactoring, that is NOT your job. This constraint is critical to prevent endless rework loops. + Pay close attention to any placeholders, comments, or notes (e.g., "to-do," "unchanged," "same as original") that indicate a section of code is incomplete. These areas are your primary focus for completion. + After repeating calling this assistant, caller expects the code completeness can be satisfied quickly. **Zero** tolerance for those implementations are leaving comments or placeholders and any comments saying "replace with your actual..." - There is only one exception that you can treat the code section is the same as the original file: + There is only one exception that you can leave as it is: There is a system syntax to denote large, sequential blocks of code that are not being modified: `#[unchanged_section]|||` e.g. #[unchanged_section]|/app/main.py|10|24 @@ -33,7 +37,7 @@ - `original_question`: The user's initial request. - `execution_plan`: The step-by-step plan that was previously generated. - `final_code_changes`: The list of final code files and their content that - resulted from the execution of the plan. + resulted from the execution of the previous plans. - `original_files`: The original, unmodified files for context. --- @@ -46,13 +50,13 @@ * **When to use:** Choose this if the `final_code_changes` fully and correctly address the `original_question` and adhere to the `execution_plan`. The - generated code should be bug-free, well-structured, no placeholder, not partial code, and **COMPLETE**. + generated code should be bug-free, well-structured, no placeholder, not partial code, and fully **COMPLETE**. ### `decision='modify'` #### **When to Use** - You must evaluate `final_code_changes` to determine whether the generated code fully satisfies the user's request and correctly implements the intended execution plan. + You must evaluate `final_code_changes` to determine whether the generated code is fully implemented and meets the user's original request. Choose `decision='modify'` if the `final_code_changes` include comments or indications such as: @@ -83,7 +87,7 @@ **Example Plan Breakdown:** * **Plan Breakdown:** - 1. **Complete code in /workspace.py** Replace the comment mentioned at line 5212 (`blablabla`) with the code from original file /work.py from line 142 to 152 + 1. **Complete code in /workspace.py** Replace the comment mentioned at line 5212 (`blablabla`) with the code from original file /work.py from line 142 to 152. ----- diff --git a/ai-hub/app/core/pipelines/question_decider.py b/ai-hub/app/core/pipelines/question_decider.py index fadbc84..79b2aa1 100644 --- a/ai-hub/app/core/pipelines/question_decider.py +++ b/ai-hub/app/core/pipelines/question_decider.py @@ -153,6 +153,11 @@ 2. **Delete** the original code from the source file in a subsequent step. This approach circumvents the single-file limitation and allows for multi-file changes. + ### **Best Practices** + * **Prioritize Creation and Addition Steps First:** Always put adding new code and logic before steps of removing modifying or refactoring existing code. This approach ensures that you don't accidentally lose functionality during a change. + * **Be Conservative with Deletions:** Avoid deleting large blocks of code unless you are absolutely certain they are no longer needed. Mass deletion can be risky and is often a sign of an incomplete understanding of the codebase. + * **Consolidate Gradually:** While code consolidation is a good goal, it's best to do it in small, incremental steps. An overly aggressive approach can be difficult to review and may lead to unexpected bugs. A gradual, measured approach is more likely to be accepted by the team and result in a more stable codebase. + ### `decision='files'` When more files are needed to fulfill the user's request, use this decision to retrieve them. This decision is suitable for a subset of files. diff --git a/ai-hub/app/core/services/utils/code_change.py b/ai-hub/app/core/services/utils/code_change.py index fc73b9f..c6b4569 100644 --- a/ai-hub/app/core/services/utils/code_change.py +++ b/ai-hub/app/core/services/utils/code_change.py @@ -35,10 +35,12 @@ self.code_changer = CodeRagCodeChanger() self.code_reviewer = CodeReviewer() - self.parsed_data: List[Dict[str, Any]] = [] + self.current_execution_plans: List[Dict[str, Any]] = [] + self.history_plans: List[Dict[str, Any]] = [] self.updated_files: Dict[str, Dict[str, str]] = {} self.original_files: Dict[str, str] = {} self.last_step_index: int = -1 + self.max_round = 3 try: self._parse_input_data() self._preload_original_files(request_id=request_id) @@ -52,23 +54,24 @@ """ cleaned_input = re.sub(r"^```json\s*|\s*```$", "", self.input_data.strip(), flags=re.DOTALL) - parsed_data = json.loads(cleaned_input) - if not isinstance(parsed_data, list): + current_execution_plans = json.loads(cleaned_input) + if not isinstance(current_execution_plans, list): raise ValueError("Input is not a JSON array.") required_keys = ["file_path", "change_instruction", "original_files", "updated_files", "action"] - for item in parsed_data: + for item in current_execution_plans: if not all(key in item for key in required_keys): raise ValueError(f"An item is missing required keys. Found: {list(item.keys())}, Required: {required_keys}") - self.parsed_data = parsed_data + self.current_execution_plans = current_execution_plans + self.history_plans.append(current_execution_plans) def _preload_original_files(self, request_id: uuid.UUID) -> None: """ Fetches and caches the content of all required original files. """ unique_file_paths = set() - for item in self.parsed_data: + for item in self.current_execution_plans: file_paths = item.get("original_files", []) for path in file_paths: unique_file_paths.add(path) @@ -138,7 +141,7 @@ ] # # Add each change instruction as a numbered list item. - # for i, data in enumerate(self.parsed_data): + # for i, data in enumerate(self.current_execution_plans): # # Use f-string to create numbered list items with proper indentation. # steps_content.append(f"{i+1}. {data['change_instruction']}") @@ -146,13 +149,13 @@ client_log: Dict[str, Any] = { "type": "code_change", "content": title, - "steps": self.parsed_data, + "steps": self.current_execution_plans, "reasoning": self.reasoning, "done": False, } await websocket.send_text(json.dumps(client_log)) - async def _post_process(self) -> Dict[str, Dict[str, str]]: + async def _post_process(self, websocket: WebSocket)->None: result = {} # Regex to find and extract content from a Markdown code block @@ -178,18 +181,71 @@ "new": cleaned_content, "reasoning": detail.get("reasoning", "") } - return result + + + # Send the final processed changes to the client + payload = json.dumps({ + "type": "code_change", + "code_changes": result, + "content": "Completed all requested code changes.", + "done": True + }) + logger.info(f"Sending code change response to client: {payload}") + await websocket.send_text(payload) async def _review_changes(self, final_code_changes: List[Dict[str, Any]]) -> Tuple[str, str, str]: with dspy.context(lm=self.llm_provider): decision, reasoning, answer = await self.code_reviewer.forward( original_question=self.original_question, - execution_plan=self.input_data, + execution_plan= json.dumps(self.history_plans), final_code_changes=final_code_changes, original_files=[{"file_path": k, "content": v} for k, v in self.original_files.items()] ) return decision, reasoning, answer + async def _inline_code_replacement(self, input_text: str) -> str: + """ + Replaces placeholders with content from the self.original_files dictionary. + """ + # Define the regex pattern to match the specified format + pattern = re.compile(r'#\[unchanged_section\]\|(.*?)\|(\d+)\|(\d+)') + + # Split the input text into lines + lines = input_text.splitlines() + updated_lines = [] + + for line in lines: + match = pattern.search(line) + if match: + file_path, start_line_str, end_line_str = match.groups() + start_line = int(start_line_str) + end_line = int(end_line_str) + + # Ensure start and end lines are in the correct order + if start_line > end_line: + start_line, end_line = end_line, start_line + + file_content_from_dict = self.original_files.get(file_path) + + if file_content_from_dict is not None: + content_lines = file_content_from_dict.splitlines(keepends=True) + + # Adjust indices for 0-based list + start_idx = start_line - 1 + end_idx = end_line + + if 0 <= start_idx < len(content_lines) and 0 <= end_idx <= len(content_lines): + section = content_lines[start_idx:end_idx] + updated_lines.append("".join(section)) + else: + updated_lines.append(f"# Error: Lines {start_line}-{end_line} are out of bounds for {file_path}\n") + else: + updated_lines.append(f"# Error: File path {file_path} not found in original_files dictionary\n") + else: + updated_lines.append(line) + + return "\n".join(updated_lines) + async def process(self, websocket: WebSocket, round :int = 0 ,title:str = "**AI-Generated Execution Plan:**") -> Dict[str, Dict[str, str]]: """ Executes all code change instructions in sequence. @@ -198,7 +254,7 @@ A dictionary of all updated files with their content and reasoning. """ await self._handle_intermediate_chat_message(websocket, title) - for item in self.parsed_data: + for item in self.current_execution_plans: action = item.get("action") filepath = item.get("file_path") reasoning = "" @@ -218,13 +274,15 @@ else: # extract the target path. response = await self._process_ai_question(item) + response["content"] = await self._inline_code_replacement(response.pop("content")) self.updated_files[filepath] = response reasoning = response.get("reasoning", "") # dspy.inspect_history(n=1) if reasoning: await self._handle_thinking_log(websocket, reasoning) - if round <=5: + + if round [...prev, { + type: "local", + message: `Iterating the directory: ${dirHandle.name} to list files...`, + }]); async function walkDirectory(handle, path = '') { for await (const entry of handle.values()) { const entryPath = `${path}/${entry.name}`;