cortex-hub/ai-hub/app/core/services/tool.py at 712d9cf7c5337d4b7d876ce0eadeb4d850ae90a3

Fork: 0
yangyangxie / cortex-hub
Find file
Newer
Older
cortex-hub / ai-hub / app / core / services / tool.py
Antigravity AI 11 hours ago 23 KB fix: restore swarm control and windows node connectivity
Raw Blame History
from typing import List, Dict, Any, Optional
from sqlalchemy.orm import Session
from app.db import models
from app.core.skills.base import BaseSkill
import logging
import time
import os

from app.core.tools.registry import tool_registry
import time

from app.core._regex import (
    SKILL_CONFIG_JSON, SKILL_DESC_OVERRIDE, 
    SKILL_PARAM_TABLE, SKILL_BASH_LOGIC
)
import json
import yaml
import litellm
import shlex

logger = logging.getLogger(__name__)

class ToolService:
    """
    Orchestrates AI tools (Skills) available to users.
    Handles discovery, permission checks, and execution routing.
    """
    
    def __init__(self, services: Any = None, local_skills: List[BaseSkill] = []):
        self._services = services
        self._local_skills = {s.name: s for s in local_skills}
        tool_registry.load_plugins()

    def get_available_tools(self, db: Session, user_id: str, feature: str = None, session_id: int = None) -> List[Dict[str, Any]]:
        allowed_skill_names = self._get_allowed_skills(db, session_id)
        
        # 1. Local Skills
        local_skills = [s.to_tool_definition() for s in self._local_skills.values() 
                       if not feature or feature in getattr(s, "features", ["chat"])]
        if allowed_skill_names is not None:
            local_skills = [t for t in local_skills if t["function"]["name"] in allowed_skill_names]
        
        # 2. VFS/FS Skills
        from app.core.skills.fs_loader import fs_loader
        db_skills = []
        max_md_len = self._resolve_model_max_len(db, user_id)
        
        for fs_skill in fs_loader.get_all_skills():
            if not fs_skill.get("is_enabled", True): continue
            if not (fs_skill.get("is_system") or fs_skill.get("owner_id") == user_id): continue
            if feature and feature not in fs_skill.get("features", ["chat"]): continue
            if allowed_skill_names is not None and fs_skill["name"] not in allowed_skill_names: continue
            if any(t["function"]["name"] == fs_skill["name"] for t in local_skills): continue
            
            db_skills.append(self._parse_vfs_skill(fs_skill))

        return local_skills + db_skills

    def _get_allowed_skills(self, db: Session, session_id: int) -> Optional[set]:
        if not session_id or not db: return None
        session_obj = db.query(models.Session).filter(models.Session.id == session_id).first()
        if not session_obj or not getattr(session_obj, "restrict_skills", False): return None
        
        allowed = set()
        if session_obj.allowed_skill_names: allowed.update(session_obj.allowed_skill_names)
        if getattr(session_obj, "skills", None): allowed.update(s.name for s in session_obj.skills)
        return allowed

    def _resolve_model_max_len(self, db: Session, user_id: str) -> int:
        from app.config import settings
        m_name = settings.ACTIVE_LLM_PROVIDER
        try:
            user = db.query(models.User).filter(models.User.id == user_id).first() if db else None
            if user and user.preferences:
                llm_prefs = user.preferences.get("llm", {})
                m_name = llm_prefs.get("model") or user.preferences.get("llm_model", m_name)
                if "/" not in m_name:
                    p = llm_prefs.get("active_provider") or user.preferences.get("llm_provider", settings.ACTIVE_LLM_PROVIDER)
                    m_name = f"{p}/{m_name}"
            
            info = litellm.get_model_info(m_name)
            if info:
                max_t = info.get("max_input_tokens", 8192)
                return max(min(int(max_t * 4 * 0.05), 40000), 1000)
        except Exception as e:
            logger.warning(f"Tool schema truncation fail: {e}")
        return 1000

    def _parse_vfs_skill(self, fs_skill: dict) -> dict:
        name = fs_skill["name"]
        description = fs_skill.get("description", "")
        parameters = {"type": "object", "properties": {}, "required": []}
        
        # Binary/VFS normalization
        class _Obj:
            def __init__(self, d):
                for k, v in d.items(): setattr(self, k, v)
        files = [_Obj(f) for f in fs_skill.get("files", [])]
        
        skill_md = next((f for f in files if f.file_path == "SKILL.md"), None)
        if skill_md and skill_md.content:
            content = str(skill_md.content)
            exec_file = next((f.file_path for f in files if f.file_path.endswith((".sh", ".py")) or "run." in f.file_path), "<executable>")
            exec_cmd = f"bash .skills/{name}/{exec_file}" if exec_file.endswith(".sh") else f"python3 .skills/{name}/{exec_file}" if exec_file.endswith(".py") else f".skills/{name}/{exec_file}"
            
            description += f"\n\n[Native VFS Skill - Execute via: `{exec_cmd}`]\n{content}"
            
            # YAML Frontmatter
            if content.startswith("---"):
                try:
                    parts = content.split("---", 2)
                    if len(parts) >= 3:
                        fm = yaml.safe_load(parts[1])
                        parameters = fm.get("config", {}).get("parameters", parameters)
                except: pass
            
            # Regex Parsers
            if not parameters or not parameters.get("properties"):
                mig_match = SKILL_CONFIG_JSON.search(content)
                if mig_match:
                    try: parameters = json.loads(mig_match.group(1).strip())
                    except: pass
                
                if not parameters or not parameters.get("properties"):
                    desc_match = SKILL_DESC_OVERRIDE.search(content)
                    if desc_match:
                        description = f"{desc_match.group(1).strip()}\n\n[Native VFS Skill - Execute via: `{exec_cmd}`]\n{content}"
                    
                    table_match = SKILL_PARAM_TABLE.search(content)
                    if table_match:
                        parameters = {"type": "object", "properties": {}, "required": []}
                        for row in table_match.group(1).strip().split('\n'):
                            cols = [c.strip() for c in row.split('|')][1:-1]
                            if len(cols) >= 4:
                                p_n = cols[0].replace('`', '').strip()
                                parameters["properties"][p_n] = {"type": cols[1].strip(), "description": cols[2].strip()}
                                if cols[3].strip().lower() in ['yes', 'true', '1', 'y']:
                                    parameters["required"].append(p_n)

        # Inject Node Selector
        if "node_id" not in parameters.get("properties", {}):
            if "properties" not in parameters: parameters["properties"] = {}
            parameters["properties"]["node_id"] = {
                "type": "string",
                "description": "Mesh node ID for execution. Leave empty to use session default."
            }
            
        return {"type": "function", "function": {"name": name, "description": description, "parameters": parameters}}

    async def call_tool(self, tool_name: str, arguments: Dict[str, Any], db: Session = None, user_id: str = None, session_id: str = None, session_db_id: int = None, on_event = None, provider_name: str = None) -> Any:
        """
        Executes a registered skill.
        """
        # --- Node Auto-Resolution & Security Guard ---
        node_id = arguments.get("node_id")
        node_ids = arguments.get("node_ids")
        if db and session_db_id:
            session_meta = db.query(models.Session).filter(models.Session.id == session_db_id).first()
            if session_meta:
                attached = session_meta.attached_node_ids or []
                if not node_id and not node_ids and attached:
                    node_id = attached[0]
                    arguments["node_id"] = node_id
                
                allowed_ids = attached + ["hub", "server", "local"]
                if node_id and node_id not in allowed_ids:
                    # Fuzzy match fallback (e.g. 'test node1' -> 'test-node-1')
                    fuzzy_nid = str(node_id).lower().replace(" ", "").replace("-", "").replace("_", "")
                    for aid in allowed_ids:
                        if aid.lower().replace("-", "").replace("_", "") == fuzzy_nid:
                            node_id = aid
                            arguments["node_id"] = aid
                            break
                    if node_id not in allowed_ids:
                        return {"success": False, "error": f"Node '{node_id}' is NOT attached or doesn't exist. Available nodes: {attached}"}

                if node_ids:
                    illegal = [nid for nid in node_ids if nid not in allowed_ids]
                    if illegal:
                        return {"success": False, "error": f"Nodes {illegal} are NOT attached to this session. Available nodes: {attached}"}

        # 1. Try local/native skill first
        if tool_name in self._local_skills:
            skill = self._local_skills[tool_name]
            result = await skill.execute(**arguments)
            return result.dict()
        
        # 2. Check the tool_registry for lightweight plugins (e.g. read_skill_artifact)
        # These are system agents that DO NOT need a full SubAgent loop — their prepare_task
        # returns a synchronous function we can call directly.
        plugin = tool_registry.get_plugin(tool_name)
        if plugin:
            orchestrator = getattr(self._services, "orchestrator", None)
            context = {
                "db": db,
                "user_id": user_id,
                "session_id": session_id or "__fs_explorer__",
                "node_id": node_id,
                "node_ids": node_ids,
                "services": self._services,
                "orchestrator": orchestrator,
                "assistant": orchestrator.assistant if orchestrator else None,
                "on_event": on_event
            }
            task_fn, task_args = plugin.prepare_task(arguments, context)
            if not task_fn:
                return task_args  # error dict
            try:
                import asyncio
                if asyncio.iscoroutinefunction(task_fn):
                    res = await task_fn(**task_args)
                else:
                    # BLOCKING CALL: Move to thread to avoid freezing the event loop
                    res = await asyncio.to_thread(task_fn, **task_args)
                
                # M6: Post-processing for Binary Artifacts (Screenshots, etc.)
                if isinstance(res, dict):
                    res = self._process_binary_artifacts(tool_name, res, session_id, arguments, orchestrator.assistant if orchestrator else None)
                return res
            except Exception as e:
                logger.exception(f"Plugin '{tool_name}' execution failed: {e}")
                return {"success": False, "error": str(e)}
        
        # 3. Handle System / FS Skills (full SubAgent or Bash)
        from app.core.skills.fs_loader import fs_loader
        all_fs_skills = fs_loader.get_all_skills()
        for fs_skill in all_fs_skills:
             if fs_skill.get("name") == tool_name:
                  class _DictObj:
                       def __init__(self, d):
                            for k, v in d.items():
                                 setattr(self, k, v)
                  fs_skill["files"] = [_DictObj(f) for f in fs_skill.get("files", [])]
                  db_skill_mock = _DictObj(fs_skill)
                  return await self._execute_system_skill(db_skill_mock, arguments, user_id=user_id, db=db, session_id=session_id, session_db_id=session_db_id, on_event=on_event, provider_name=provider_name)
        
        logger.error(f"Tool '{tool_name}' not found or handled yet.")
        return {"success": False, "error": "Tool not found"}

    async def _execute_system_skill(self, skill: Any, args: Dict[str, Any], user_id: str = None, db: Session = None, session_id: str = None, session_db_id: int = None, on_event = None, provider_name: str = None) -> Any:
        """Routes FS skill execution to a stateful SubAgent or Dynamic Plugin."""
        from app.core.services.sub_agent import SubAgent
        from app.core.providers.factory import get_llm_provider
        
        # Node IDs are already validated and resolved from step 0 in call_tool.
        node_id = args.get("node_id")
        node_ids = args.get("node_ids")

        # --- Standard Preparation ---
        llm_provider = None
        orchestrator = getattr(self._services, "orchestrator", None)
        if not orchestrator:
            return {"success": False, "error": "Orchestrator not available"}
        
        assistant = orchestrator.assistant
        
        # M3: Resolve session_id from either arguments OR the passed session_id context
        # (AI might use placeholders like 'current' which we resolve here)
        session_id_arg = args.get("session_id")
        if not session_id_arg or session_id_arg == "current":
             resolved_sid = session_id or "__fs_explorer__"
        else:
             resolved_sid = session_id_arg
             
        llm_provider = self._resolve_llm_for_sub_agent(db, user_id, provider_name)
        if llm_provider:
            logger.info(f"[ToolService] AI Sub-Agent enabled for {skill.name}")

        # Define the task function and arguments for the SubAgent
        task_fn = None
        task_args = {}

        plugin = tool_registry.get_plugin(skill.name)
        if not plugin:
            # Check if this is a Dynamic Bash Skill
            bash_logic = None
            skill_md_file = next((f for f in skill.files if f.file_path == "SKILL.md"), None) if getattr(skill, "files", None) else None
            if skill_md_file and skill_md_file.content:
                import re
                # Broadened regex: Allows 3-6 hashes, any title containing "Execution Logic", and handles files ending without newlines.
                bash_match = re.search(r"#{3,6}\s*Execution Logic.*?```bash\s*\n(.*?)(?:```|\Z)", skill_md_file.content, re.DOTALL | re.IGNORECASE)
                if bash_match:
                    bash_logic = bash_match.group(1).strip()
            
            if not bash_logic:
                exec_file = "<executable>"
                for f in getattr(skill, "files", []):
                    if f.file_path.endswith(".sh") or f.file_path.endswith(".py") or "run." in f.file_path:
                        exec_file = f.file_path
                        break
                exec_cmd = f"bash .skills/{skill.name}/{exec_file}" if exec_file.endswith(".sh") else f"python3 .skills/{skill.name}/{exec_file}" if exec_file.endswith(".py") else f".skills/{skill.name}/{exec_file}"

            class DynamicBashPlugin:
                name = skill.name
                retries = 0
                def prepare_task(self, invoke_args, invoke_context):
                    import shlex
                    
                    if bash_logic:
                        cmd = bash_logic
                        for k, v in invoke_args.items():
                            cmd = cmd.replace(f"${{{k}}}", shlex.quote(str(v)))
                    else:
                        # Auto-bridging fallback: construct command with env vars and positional args
                        safe_args = {k: v for k, v in invoke_args.items() if k != "timeout" and k != "node_id" and k != "node_ids"}
                        bash_env = " ".join([f'{k}={shlex.quote(str(v))}' for k, v in safe_args.items()])
                        bash_args_str = " ".join([shlex.quote(str(v)) for v in safe_args.values()])
                        cmd = f"{bash_env} {exec_cmd} {bash_args_str}".strip()
                        
                    timeout = int(invoke_args.get("timeout", 60))
                    node_id = invoke_context.get("node_id", invoke_args.get("node_id"))
                    node_ids = invoke_context.get("node_ids", invoke_args.get("node_ids"))
                    resolved_sid = invoke_context.get("session_id")
                    assistant = invoke_context.get("assistant")
                    services = invoke_context.get("services")
                    
                    if node_id in ["hub", "server", "local"] or (node_ids and any(nid in ["hub", "server", "local"] for nid in node_ids)):
                        def _hub_command(**kwargs):
                            import subprocess, os
                            cwd = os.getcwd()
                            if kwargs.get("resolved_sid") and getattr(services, "orchestrator", None):
                                try: cwd = services.orchestrator.mirror.get_workspace_path(kwargs.get("resolved_sid"))
                                except: pass
                            try:
                                proc = subprocess.run(kwargs.get("cmd"), shell=True, capture_output=True, text=True, timeout=kwargs.get("timeout"), cwd=cwd)
                                return {"status": "SUCCESS" if proc.returncode == 0 else "FAILED", "stdout": proc.stdout, "stderr": proc.stderr, "exit_code": proc.returncode, "node_id": "hub"}
                            except subprocess.TimeoutExpired as e:
                                return {"status": "TIMEOUT", "stdout": e.stdout or "", "stderr": e.stderr or "", "error": "Command timed out"}
                            except Exception as e:
                                return {"status": "ERROR", "error": str(e)}
                        return _hub_command, {"cmd": cmd, "timeout": timeout, "resolved_sid": resolved_sid}
                    elif node_ids and isinstance(node_ids, list):
                        return assistant.dispatch_swarm, {"node_ids": node_ids, "cmd": cmd, "timeout": timeout, "session_id": resolved_sid, "no_abort": False}
                    elif node_id:
                        return assistant.dispatch_single, {"node_id": node_id, "cmd": cmd, "timeout": timeout, "session_id": resolved_sid, "no_abort": False}
                    return None, {"success": False, "error": f"Please map a target node_id to execute this tool natively."}
            
            plugin = DynamicBashPlugin()

        context = {
             "db": db,
             "user_id": user_id,
             "session_id": resolved_sid,
             "node_id": node_id,
             "node_ids": node_ids,
             "assistant": assistant,
             "orchestrator": orchestrator,
             "services": self._services,
             "on_event": on_event
        }

        task_fn, task_args = plugin.prepare_task(args, context)
        if not task_fn:
            return task_args # error dict returned by prepare_task

        try:
            if task_fn:
                # Create and run the SubAgent (potentially AI-powered)
                sub_agent = SubAgent(
                    name=f"{skill.name}_{node_id or 'swarm'}",
                    task_fn=task_fn,
                    args=task_args,
                    retries=plugin.retries,
                    llm_provider=llm_provider,
                    assistant=assistant,
                    on_event=on_event
                )
                res = await sub_agent.run()
                
                # Standardize output for AI: 
                # If it's a string (our new Intelligence Report), pass it through directly.
                # If it's a dict, only wrap as failure if a non-None error exists.
                if isinstance(res, dict) and res.get("error"):
                    return {"success": False, "error": res["error"], "sub_agent_status": sub_agent.status}
                
                # M6: Post-processing for Binary Artifacts (Screenshots, etc.)
                if isinstance(res, dict):
                    res = self._process_binary_artifacts(skill.name, res, resolved_sid, args, assistant)

                logger.info(f"[ToolService] System skill '{skill.name}' completed (Status: {sub_agent.status}).")
                return res

        except Exception as e:
            logger.exception(f"System skill execution failed: {e}")
            return {"success": False, "error": str(e)}

        return {"success": False, "error": "Skill execution logic not found"}

    def _process_binary_artifacts(self, skill_name: str, res: Dict[str, Any], resolved_sid: str, args: Dict[str, Any], assistant: Any) -> Dict[str, Any]:
        """Post-processing for Binary Artifacts (Screenshots, etc.)"""
        # Unconditionally prevent binary data from leaking into the JSON serializer
        screenshot_bits = res.pop("_screenshot_bytes", None)
        
        if skill_name == "browser_automation_agent" and assistant:
            # Organise browser data by session for better UX
            if resolved_sid and resolved_sid != "__fs_explorer__":
                try:
                    abs_workspace = assistant.mirror.get_workspace_path(resolved_sid)
                    # M6: Use .browser_data (ignored from node sync)
                    base_dir = os.path.join(abs_workspace, ".browser_data")
                    os.makedirs(base_dir, exist_ok=True)
                    
                    timestamp = int(time.time())
                    action = args.get("action", "unknown").lower()
                    
                    # Clean filename for the image: {timestamp}_{action}.png
                    ss_filename = f"{timestamp}_{action}.png"

                    # Save Screenshot if available
                    if screenshot_bits:
                        ss_path = os.path.join(base_dir, ss_filename)
                        with open(ss_path, "wb") as f:
                            f.write(screenshot_bits)
                        res["screenshot_url"] = f"/.browser_data/{resolved_sid}/{ss_filename}"
                        res["_visual_feedback"] = f"Action screenshot captured: {res['screenshot_url']}"

                    # Save Metadata/A11y into a hidden or specific sub-folder if needed,
                    # but keep images in the root of the session for quick gallery view.
                    action_dir = os.path.join(base_dir, ".metadata", f"{timestamp}_{action}")
                    os.makedirs(action_dir, exist_ok=True)

                    # Save Metadata/Result for easy debugging in file explorer
                    import json
                    meta = {
                        "timestamp": timestamp,
                        "action": action,
                        "url": res.get("url"),
                        "title": res.get("title"),
                        "success": res.get("success"),
                        "error": res.get("error"),
                        "eval_result": res.get("eval_result")
                    }
                    with open(os.path.join(action_dir, "metadata.json"), "w") as f:
                        json.dump(meta, f, indent=2)
                    
                    # Optional: Save A11y summary for quick viewing
                    if "a11y_summary" in res:
                        with open(os.path.join(action_dir, "a11y_summary.txt"), "w") as f:
                            f.write(res["a11y_summary"])

                    logger.info(f"[ToolService] Browser artifacts saved to: {action_dir}")
                except Exception as sse:
                    logger.warning(f"Failed to persist browser data to workspace: {sse}")

        return res
    def _resolve_llm_for_sub_agent(self, db: Session, user_id: str, provider_name: str) -> Optional[Any]:
        if not db or not user_id or not self._services: return None
        user = db.query(models.User).filter(models.User.id == user_id).first()
        if not user: return None

        provider, _ = self._services.preference_service.resolve_llm_provider(db, user, provider_name)
        return provider