Newer
Older
cortex-hub / ai-hub / app / api / routes / mcp.py
"""
MCP (Model Context Protocol) Server Route — Streamable HTTP + Legacy SSE Transport

Supports:
  MCP spec 2025-11-25 — Streamable HTTP (primary, recommended)
  MCP spec 2024-11-05 — HTTP+SSE (legacy, backwards-compat)

Endpoints (mounted under /api/v1/mcp/*):
  POST /mcp/sse     — Streamable HTTP: JSON-RPC in, JSON response out
  POST /mcp/        — Same, aliased for clients using the base path
  GET  /mcp/sse     — Legacy SSE stream (sends endpoint event)
  POST /mcp/messages — Legacy SSE message handler

Discovery:
  GET  /.well-known/mcp/manifest.json  (mounted in app.py)
"""


import asyncio
import json
import uuid
import logging
import jwt
from typing import Optional, List, Annotated
from fastapi import APIRouter, HTTPException, Request, Query, Header
from fastapi.responses import JSONResponse, StreamingResponse

from app.api.dependencies import ServiceContainer
from app.config import settings

logger = logging.getLogger(__name__)

MCP_VERSION = "2025-11-25"  # Latest MCP specification version

# ─── In-process SSE session registry ─────────────────────────────────────────
# Maps session_id → asyncio.Queue of JSON-serializable dicts
_sse_sessions: dict[str, asyncio.Queue] = {}


def create_mcp_router(services: ServiceContainer) -> APIRouter:
    router = APIRouter(tags=["MCP"])

    async def _get_authenticated_user(request: Request, token: Optional[str], db) -> Optional[str]:
        """
        Resolves the user_id from either the Authorization header (JWT) or the token query param.
        If OIDC is enabled, this strictly requires a valid JWT.
        """
        from app.config import settings
        
        auth_header = request.headers.get("Authorization")
        if auth_header and auth_header.startswith("Bearer "):
            token = auth_header.split(" ")[1]

        if not token:
            return None

        is_jwt = "." in token
        
        # 1. OIDC Mode: Support both OIDC (RS256) and Internal (HS256) JWTs
        if settings.OIDC_ENABLED:
            if not is_jwt:
                logger.warning(f"[MCP] Rejected non-JWT token in OIDC mode.")
                raise HTTPException(
                    status_code=401, 
                    detail="Authentication required: Provide a valid JWT."
                )
            
            try:
                # Try internal HS256 first
                unverified = jwt.decode(token, options={"verify_signature": False})
                if unverified.get("iss") == "cortex-hub-internal":
                    decoded = jwt.decode(token, settings.SECRET_KEY, algorithms=["HS256"])
                    return decoded.get("sub")
                
                # Fallback to OIDC RS256
                user = await services.auth_service.verify_id_token(token, db)
                return user.id
            except Exception as e:
                logger.error(f"[MCP] JWT verification failed: {e}")
                raise HTTPException(status_code=401, detail=f"Invalid token: {str(e)}")
        
        # 2. Legacy/Bootstrap Mode: Accept plain user_id (Identity Claim)
        # This is only active when OIDC is not configured.
        return token

    # ─── SSE Transport — Client Connection ────────────────────────────────────
    @router.get("/sse", summary="MCP SSE Transport Endpoint")
    async def mcp_sse(
        request: Request,
        token: Optional[str] = Query(None),
    ):
        """
        Server-Sent Events (SSE) transport for MCP.
        Supports Bearer token in Authorization header or 'token' query parameter.
        """
        from app.db.session import get_db_session
        with get_db_session() as db:
            user_id = await _get_authenticated_user(request, token, db)
        
        if not user_id:
            # We allow the SSE connection to open even without auth, 
            # but actual messages/tools will be rejected.
            logger.info("[MCP] SSE connection opened without initial auth.")

        queue = asyncio.Queue()
        session_id = str(uuid.uuid4())
        _sse_sessions[session_id] = queue
        
        messages_url = f"{settings.HUB_PUBLIC_URL}/api/v1/mcp/messages?session_id={session_id}"
        if user_id:
            messages_url += f"&token={user_id}"

        # Origin validation per MCP 2025-11-25
        origin = request.headers.get("origin")
        if origin:
            allowed = ["https://ai.jerxie.com", "http://localhost:3000", "http://localhost:8080"]
            if not any(origin.startswith(a) for a in allowed):
                logger.warning(f"[MCP] Blocked unauthorized origin: {origin}")
                raise HTTPException(status_code=403, detail="Unauthorized Origin")

        async def _event_generator():
            try:
                yield f"event: endpoint\ndata: {messages_url}\n\n"
                while True:
                    msg = await queue.get()
                    yield f"event: message\ndata: {json.dumps(msg)}\n\n"
            finally:
                _sse_sessions.pop(session_id, None)

        return StreamingResponse(
            _event_generator(),
            media_type="text/event-stream",
            headers={
                "Cache-Control": "no-cache",
                "X-Accel-Buffering": "no",
                "Access-Control-Allow-Origin": "*",
            },
        )

    # ─── Streamable HTTP Transport (MCP 2025-11-25) ───────────────────────────
    @router.post("/sse")
    @router.post("/", summary="MCP Streamable HTTP Endpoint (Post-only mode)")
    async def mcp_streamable_http(
        request: Request,
        token: Optional[str] = Query(None),
    ):
        """
        One-shot JSON-RPC over HTTP.
        Supports Bearer token in Authorization header or 'token' query parameter.
        """
        from app.db.session import get_db_session
        with get_db_session() as db:
            user_id = await _get_authenticated_user(request, token, db)

        # Origin validation per MCP 2025-11-25
        origin = request.headers.get("origin")
        if origin:
            allowed = [
                "https://ai.jerxie.com",
                "http://localhost:3000",
                "http://localhost:8080",
            ]
            # Also allow the server's own origin
            server_host = request.headers.get("host", "")
            allowed.append(f"https://{server_host}")
            allowed.append(f"http://{server_host}")
            if not any(origin.startswith(a) for a in allowed):
                logger.warning(f"[MCP] Rejected request from disallowed origin: {origin}")
                return JSONResponse(
                    {"jsonrpc": "2.0", "error": {"code": -32000, "message": "Forbidden origin"}},
                    status_code=403,
                )

        try:
            body = await request.json()
        except Exception:
            raise HTTPException(status_code=400, detail="Invalid JSON body.")

        # Batch requests (JSON array)
        if isinstance(body, list):
            results = []
            for item in body:
                results.append(await _handle_single(item, user_id, services))
            return JSONResponse(
                [r for r in results if r is not None],
                headers={"Access-Control-Allow-Origin": "*", "MCP-Protocol-Version": MCP_VERSION},
            )

        # Single request
        response = await _handle_single(body, user_id, services)
        if response is None:  # notification — no id
            return JSONResponse(
                None, status_code=202,
                headers={"Access-Control-Allow-Origin": "*"},
            )

        # If initialize, attach a session ID (MAY per spec)
        headers = {"Access-Control-Allow-Origin": "*", "MCP-Protocol-Version": MCP_VERSION}
        if body.get("method") == "initialize":
            headers["Mcp-Session-Id"] = str(uuid.uuid4())

        return JSONResponse(response, headers=headers)

    # ─── SSE Transport — Message Handler ─────────────────────────────────────
    @router.post("/messages")
    async def mcp_messages(
        request: Request,
        session_id: str = Query(...),
        token: Optional[str] = Query(None),
    ):
        """
        Legacy SSE message handler — receives JSON-RPC 2.0 from a client that
        first established a GET /sse stream, then pushes results over that stream.
        """
        queue = _sse_sessions.get(session_id)
        if not queue:
            raise HTTPException(status_code=404, detail="MCP session not found or expired.")

        try:
            body = await request.json()
        except Exception:
            raise HTTPException(status_code=400, detail="Invalid JSON body.")

        rpc_id = body.get("id")
        method = body.get("method", "")
        params = body.get("params", {})

        logger.info(f"[MCP] [{session_id[:8]}] → {method}")
        asyncio.create_task(_dispatch(queue, rpc_id, method, params, token, services))

        return JSONResponse(
            {"status": "accepted"},
            status_code=202,
            headers={"Access-Control-Allow-Origin": "*"},
        )

    return router


# ─── Single-request handler (used by Streamable HTTP) ────────────────────────

async def _handle_single(body: dict, token: Optional[str], services: ServiceContainer):
    """Process one JSON-RPC object; return response dict or None for notifications."""
    rpc_id  = body.get("id")      # None means it's a notification
    method  = body.get("method", "")
    params  = body.get("params", {})

    logger.info(f"[MCP-HTTP] → {method}")
    try:
        result = await _execute(method, params, token, services)
        if rpc_id is None:
            return None   # notification — no response
        return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
    except Exception as exc:
        logger.exception(f"[MCP-HTTP] Error for '{method}': {exc}")
        if rpc_id is None:
            return None
        return {
            "jsonrpc": "2.0",
            "id": rpc_id,
            "error": {"code": -32000, "message": str(exc)},
        }



# ─── Dispatcher ───────────────────────────────────────────────────────────────

async def _dispatch(
    queue: asyncio.Queue,
    rpc_id,
    method: str,
    params: dict,
    token: Optional[str],
    services: ServiceContainer,
):
    """Run the method and push a JSON-RPC response onto the SSE queue."""
    try:
        result = await _execute(method, params, token, services)
        await queue.put({"jsonrpc": "2.0", "id": rpc_id, "result": result})
    except Exception as exc:
        logger.exception(f"[MCP] Tool error for '{method}': {exc}")
        await queue.put({
            "jsonrpc": "2.0",
            "id": rpc_id,
            "error": {"code": -32000, "message": str(exc)},
        })


async def _execute(method: str, params: dict, token: Optional[str], services: ServiceContainer):
    """Route a JSON-RPC method to its implementation."""

    # ── MCP Handshake ─────────────────────────────────────────────────────────
    if method == "initialize":
        return {
            "protocolVersion": MCP_VERSION,
            "capabilities": {"tools": {}},
            "serverInfo": {"name": "Cortex Hub", "version": "1.0.0"},
        }

    if method == "ping":
        return {}

    # ── Tool Discovery ────────────────────────────────────────────────────────
    if method == "tools/list":
        return {
            "tools": [
                _tool_def("list_nodes",
                          "List all agent nodes in the Cortex swarm mesh and their status.",
                          {}),
                _tool_def("get_app_info",
                          "Get metadata about this Cortex Hub instance.",
                          {}),
                _tool_def("get_node_details",
                          "Get full details for a specific agent node.",
                          {"node_id": {"type": "string", "description": "Unique node ID"}},
                          required=["node_id"]),
                _tool_def("list_agents",
                          "List all autonomous agents configured in the system.",
                          {}),
                _tool_def("list_skills",
                          "List all skill folders (tool libraries) registered in the system.",
                          {}),
                _tool_def("dispatch",
                          "Dispatch a shell command to a specific agent node.",
                          {
                              "node_id": {"type": "string", "description": "Unique node ID"},
                              "command": {"type": "string", "description": "Command to execute"},
                              "session_id": {"type": "string", "description": "Optional session ID"},
                          },
                          required=["node_id", "command"]),
                _tool_def("write_file",
                          "Create or update a file on a specific agent node.",
                          {
                              "node_id": {"type": "string", "description": "Unique node ID"},
                              "path": {"type": "string", "description": "Path to file"},
                              "content": {"type": "string", "description": "Content to write (string)"},
                              "is_dir": {"type": "boolean", "description": "True if creating a directory"},
                              "session_id": {"type": "string", "description": "Optional session ID"},
                          },
                          required=["node_id", "path"]),
                _tool_def("delete_file",
                          "Delete a file or directory on a specific agent node.",
                          {
                              "node_id": {"type": "string", "description": "Unique node ID"},
                              "path": {"type": "string", "description": "Path to file or directory"},
                              "session_id": {"type": "string", "description": "Optional session ID"},
                          },
                          required=["node_id", "path"]),
            ]
        }

    # ── Tool Execution ────────────────────────────────────────────────────────
    if method == "tools/call":
        name = params.get("name", "")
        args = params.get("arguments", {})
        return await _call_tool(name, args, token, services)

    raise ValueError(f"Unknown method: '{method}'")


def _tool_def(name: str, description: str, properties: dict, required: list = None) -> dict:
    schema = {"type": "object", "properties": properties}
    if required:
        schema["required"] = required
    return {"name": name, "description": description, "inputSchema": schema}


# ─── Tool Implementations ─────────────────────────────────────────────────────

async def _call_tool(name: str, args: dict, token: Optional[str], services: ServiceContainer) -> dict:
    """Execute a named tool and return a standard MCP content block."""

    def _ok(data) -> dict:
        text = json.dumps(data, indent=2, default=str) if not isinstance(data, str) else data
        return {"content": [{"type": "text", "text": text}]}

    # Run DB queries in a thread pool so we don't block the event loop
    loop = asyncio.get_running_loop()

    if name == "list_nodes":
        if not token:
            raise ValueError("Authentication required to list nodes.")
        def _query():
            from app.db.session import get_db_session
            with get_db_session() as db:
                # Use MeshService to filter nodes based on the authenticated user_id
                nodes = services.mesh_service.list_accessible_nodes(token, db)
                return {
                    "nodes": [
                        {
                            "id": n.node_id,
                            "name": n.display_name,
                            "status": n.last_status,
                            "os": (n.capabilities or {}).get("os"),
                            "is_active": n.is_active,
                        }
                        for n in nodes
                    ]
                }
        return _ok(await loop.run_in_executor(None, _query))

    if name == "get_app_info":
        def _query():
            from app.db.session import get_db_session
            with get_db_session() as db:
                if token:
                    # Filtered counts if authenticated
                    nodes = services.mesh_service.list_accessible_nodes(token, db)
                    total = len(nodes)
                    online = len([n for n in nodes if n.last_status == "online"])
                else:
                    # Return zero counts if not authenticated
                    total = 0
                    online = 0

                return {
                    "name": "Cortex Hub",
                    "version": "1.0.0",
                    "capabilities": ["swarms", "webmcp", "mcp-sse", "voice-chat", "rag"],
                    "nodes": {"total": total, "online": online},
                    "mcp_transport": "sse",
                    "sse_endpoint": f"{settings.HUB_PUBLIC_URL}/api/v1/mcp/sse",
                    "auth": {"oidc_enabled": settings.OIDC_ENABLED}
                }
        return _ok(await loop.run_in_executor(None, _query))

    if name == "get_node_details":
        if not token:
            raise ValueError("Authentication required to get node details.")
        node_id = args.get("node_id")
        if not node_id:
            raise ValueError("node_id is required.")
        def _query():
            from app.db.session import get_db_session
            with get_db_session() as db:
                # Enforce permission check before returning details
                try:
                    services.mesh_service.require_node_access(token, node_id, db)
                except Exception:
                    return None # Access denied

                n = services.mesh_service.get_node_or_404(node_id, db)
                return {
                    "node_id": n.node_id,
                    "display_name": n.display_name,
                    "description": n.description,
                    "status": n.last_status,
                    "is_active": n.is_active,
                    "capabilities": n.capabilities,
                    "skill_config": n.skill_config,
                    "registered_by": n.registered_by,
                    "last_seen_at": str(n.last_seen_at) if n.last_seen_at else None,
                }
        result = await loop.run_in_executor(None, _query)
        if result is None:
            raise ValueError(f"Node '{node_id}' not found.")
        return _ok(result)

    if name == "list_agents":
        if not token:
            raise ValueError("Authentication required to list agents.")
        def _query():
            from app.db.session import get_db_session
            from app.db import models
            with get_db_session() as db:
                # Basic hardening: Only show agents on nodes user can access
                accessible_nodes = services.mesh_service.list_accessible_nodes(token, db)
                node_ids = [n.node_id for n in accessible_nodes]
                
                rows = db.query(models.AgentInstance).filter(models.AgentInstance.mesh_node_id.in_(node_ids)).all()
                return {
                    "agents": [
                        {
                            "id": str(a.id),
                            "name": a.template.name if a.template else None,
                            "status": a.status,
                            "node": a.mesh_node_id,
                            "last_heartbeat": str(a.last_heartbeat) if a.last_heartbeat else None,
                            "total_runs": a.total_runs,
                            "quality_score": a.latest_quality_score,
                        }
                        for a in rows
                    ]
                }
        return _ok(await loop.run_in_executor(None, _query))

    if name == "list_skills":
        if not token:
            raise ValueError("Authentication required to list skills.")
        def _query():
            from app.db.session import get_db_session
            from app.db import models
            with get_db_session() as db:
                rows = db.query(models.Skill).filter(models.Skill.is_enabled == True).all()
                return {
                    "skills": [
                        {
                            "id": s.id,
                            "name": s.name,
                            "description": s.description,
                            "type": s.skill_type,
                        }
                        for s in rows
                    ]
                }
        return _ok(await loop.run_in_executor(None, _query))

    if name == "dispatch":
        if not token:
            raise ValueError("Authentication required to dispatch tasks.")
        node_id = args.get("node_id")
        command = args.get("command")
        session_id = args.get("session_id", "")
        
        if not node_id or not command:
            raise ValueError("node_id and command are required.")
            
        def _execute_dispatch():
            from app.db.session import get_db_session
            with get_db_session() as db:
                task_id = services.mesh_service.dispatch_task(
                    node_id, command, token, db, session_id=session_id
                )
                return {"status": "accepted", "task_id": task_id}
                
        return _ok(await loop.run_in_executor(None, _execute_dispatch))

    if name == "write_file":
        if not token:
            raise ValueError("Authentication required to write files.")
        node_id = args.get("node_id")
        path = args.get("path")
        content = args.get("content", "")
        is_dir = args.get("is_dir", False)
        session_id = args.get("session_id", "__fs_explorer__")
        
        if not node_id or not path:
            raise ValueError("node_id and path are required.")
            
        def _execute_write():
            from app.db.session import get_db_session
            with get_db_session() as db:
                services.mesh_service.require_node_access(token, node_id, db)
                orchestrator = services.orchestrator
                res = orchestrator.assistant.write(node_id, path, content, is_dir, session_id=session_id)
                return res
                
        return _ok(await loop.run_in_executor(None, _execute_write))

    if name == "delete_file":
        if not token:
            raise ValueError("Authentication required to delete files.")
        node_id = args.get("node_id")
        path = args.get("path")
        session_id = args.get("session_id", "__fs_explorer__")
        
        if not node_id or not path:
            raise ValueError("node_id and path are required.")
            
        def _execute_delete():
            from app.db.session import get_db_session
            with get_db_session() as db:
                services.mesh_service.require_node_access(token, node_id, db)
                orchestrator = services.orchestrator
                res = orchestrator.assistant.rm(node_id, path, session_id=session_id)
                return res
                
        return _ok(await loop.run_in_executor(None, _execute_delete))

    # Writable tools (future-proofing check)
    # If OIDC is disabled, we block any tool that could manipulate the swarm mesh
    # as plain Identity Claims are not secure enough for headless write operations.
    writable_tools = [] # Planned tools
    if name in writable_tools and not settings.OIDC_ENABLED:
        raise HTTPException(
            status_code=403, 
            detail="Swarm manipulation tools are disabled because OIDC is not configured."
        )

    raise ValueError(f"Unknown tool: '{name}'")