diff --git a/agent-node/src/agent_node/node.py b/agent-node/src/agent_node/node.py index 6e7e7b4..ed0e277 100644 --- a/agent-node/src/agent_node/node.py +++ b/agent-node/src/agent_node/node.py @@ -44,7 +44,12 @@ def __init__(self): # 1. Initialize Transport (gRPC by default for production) # Pass the secure stub factory to keep existing security logic - self.transport = GrpcMeshTransport(config.NODE_ID, get_secure_stub, auth_token=config.AUTH_TOKEN) + self.transport = GrpcMeshTransport( + config.NODE_ID, get_secure_stub, + auth_token=config.AUTH_TOKEN, + hub_http_url=config.HUB_URL, + secret_key=config.SECRET_KEY, + ) # 2. Initialize Core Engine super().__init__(config.NODE_ID, self.transport) diff --git a/ai-hub/app/api/routes/agent_update.py b/ai-hub/app/api/routes/agent_update.py index 60a98ba..6423965 100644 --- a/ai-hub/app/api/routes/agent_update.py +++ b/ai-hub/app/api/routes/agent_update.py @@ -212,4 +212,31 @@ headers={"Content-Disposition": f"attachment; filename=cortex-agent-{arch}"} ) + @router.get("/token-sync", summary="Agent Token Self-Recovery") + def token_sync(node_id: str, request: Request): + """ + Allows an agent to recover a fresh invite_token using only its stable secret_key. + Called automatically by GrpcMeshTransport when a handshake is rejected. + Requires X-Agent-Token == hub SECRET_KEY (not the per-node token). + """ + from app.config import settings + token = request.headers.get("X-Agent-Token", "") + if token != settings.SECRET_KEY: + raise HTTPException(status_code=401, detail="Unauthorized") + + from app.db.session import SessionLocal + from app.db.models.node import AgentNode + db = SessionLocal() + try: + node = db.query(AgentNode).filter( + AgentNode.node_id == node_id, + AgentNode.is_active == True + ).first() + if not node: + raise HTTPException(status_code=404, detail=f"Node '{node_id}' not found or inactive.") + logger.info(f"[AgentUpdate] Token sync for node '{node_id}'") + return JSONResponse({"node_id": node_id, "invite_token": node.invite_token}) + finally: + db.close() + return router diff --git a/mesh-sdk/mesh_core/transport/grpc.py b/mesh-sdk/mesh_core/transport/grpc.py index b0ee341..2cd8716 100644 --- a/mesh-sdk/mesh_core/transport/grpc.py +++ b/mesh-sdk/mesh_core/transport/grpc.py @@ -2,6 +2,10 @@ import queue import time import logging +import urllib.request +import urllib.error +import json +import os from typing import Any, Optional, Callable, Union from ..models import agent_pb2, agent_pb2_grpc from .base import IMeshTransport, IMeshListener @@ -13,10 +17,13 @@ gRPC implementation of the Mesh Transport. Encapsulates the bidirectional stream and auto-reconnection logic. """ - def __init__(self, node_id: str, stub_factory: Callable[[], tuple], auth_token: str = ""): + def __init__(self, node_id: str, stub_factory: Callable[[], tuple], auth_token: str = "", + hub_http_url: str = "", secret_key: str = ""): self.node_id = node_id self.stub_factory = stub_factory # Callable returning (stub, channel) self.auth_token = auth_token + self.hub_http_url = hub_http_url.rstrip("/") + self.secret_key = secret_key self.listener = None self.stub = None self.channel = None @@ -26,6 +33,7 @@ self._connected = False self._health_thread_started = False self.last_activity = 0 + def handshake(self) -> bool: self._refresh_stub() try: @@ -41,11 +49,73 @@ return True else: logger.error(f"[Mesh] Handshake REJECTED for {self.node_id}: {res.error_message}") + if self.hub_http_url and self.secret_key: + logger.info(f"[Mesh] Attempting token self-recovery for {self.node_id}...") + if self._try_token_recovery(): + return True return False except Exception as e: logger.error(f"[Mesh] Handshake FAILED for {self.node_id}: {e}") return False + def _try_token_recovery(self) -> bool: + """Fetches a fresh invite_token from the hub using the stable secret_key.""" + try: + url = f"{self.hub_http_url}/api/v1/agent/token-sync?node_id={self.node_id}" + req = urllib.request.Request(url, headers={"X-Agent-Token": self.secret_key}) + with urllib.request.urlopen(req, timeout=10) as resp: + data = json.loads(resp.read().decode()) + new_token = data.get("invite_token", "") + if not new_token: + logger.error("[Mesh] Token recovery: hub returned empty token") + return False + logger.info(f"[Mesh] Token recovery successful — updating auth_token") + self.auth_token = new_token + self._persist_token(new_token) + # Retry handshake with fresh token + req2 = agent_pb2.RegistrationRequest( + node_id=self.node_id, + auth_token=self.auth_token, + node_description="Portable Mesh Node" + ) + res2 = self.stub.SyncConfiguration(req2) + if res2.success: + logger.info(f"[Mesh] Handshake successful after token recovery for {self.node_id}") + return True + logger.error(f"[Mesh] Handshake still rejected after token recovery: {res2.error_message}") + return False + except Exception as e: + logger.error(f"[Mesh] Token recovery failed: {e}") + return False + + def _persist_token(self, new_token: str): + """Writes the recovered token to all known config file locations.""" + candidates = [] + # Authoritative path: next to src/ (two levels above this install) + try: + src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) + candidates.append(os.path.join(src_dir, "agent_config.yaml")) + except Exception: + pass + # Ghost/user path + candidates.append(os.path.expanduser("~/.cortex/agent-node/agent_config.yaml")) + candidates.append(os.path.expanduser("~/.cortex/agent.yaml")) + + import re + token_re = re.compile(r'^(\s*(?:auth_token|invite_token)\s*:\s*).*$', re.MULTILINE) + for path in candidates: + if not os.path.exists(path): + continue + try: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + updated = token_re.sub(lambda m: m.group(1) + new_token, content) + with open(path, "w", encoding="utf-8") as f: + f.write(updated) + logger.info(f"[Mesh] Persisted new token to {path}") + except Exception as e: + logger.warning(f"[Mesh] Could not persist token to {path}: {e}") + def connect(self): self._stop_event.clear() self._refresh_stub() diff --git a/reinstall_windows_agent.ps1 b/reinstall_windows_agent.ps1 index cccf305..66da338 100644 --- a/reinstall_windows_agent.ps1 +++ b/reinstall_windows_agent.ps1 @@ -1,12 +1,76 @@ -$taskName = "CortexAgent" -$pythonPath = "C:\CortexAgent\venv\Scripts\python.exe" -$mainPath = "C:\CortexAgent\src\agent_node\main.py" -$logPath = "C:\CortexAgent\agent.log" +param( + [string]$Token = "" +) +$taskName = "CortexAgent" +$agentRoot = "C:\CortexAgent" +$pythonPath = "$agentRoot\venv\Scripts\python.exe" +$mainPath = "$agentRoot\src\agent_node\main.py" +$logPath = "$agentRoot\agent.log" +$configPath = "$agentRoot\agent_config.yaml" +$ghostRoot = "$env:USERPROFILE\.cortex\agent-node" +$ghostBat = "$ghostRoot\start_agent.bat" +$ghostConfig = "$ghostRoot\agent_config.yaml" + +Write-Host "[1/6] Killing all Python processes..." +Get-Process -Name python, python3 -ErrorAction SilentlyContinue | Stop-Process -Force +Start-Sleep -Seconds 2 + +Write-Host "[2/6] Disabling ghost startup bat (if present)..." +if (Test-Path $ghostBat) { + Rename-Item -Path $ghostBat -NewName "start_agent.bat.disabled" -Force + Write-Host " Renamed $ghostBat -> start_agent.bat.disabled" +} + +Write-Host "[3/6] Syncing ghost config from authoritative config..." +if ((Test-Path $configPath) -and (Test-Path $ghostRoot)) { + Copy-Item -Path $configPath -Destination $ghostConfig -Force + Write-Host " Copied $configPath -> $ghostConfig" +} + +Write-Host "[4/6] Updating auth token (if -Token provided)..." +if ($Token -ne "") { + # Update the authoritative config + if (Test-Path $configPath) { + $content = Get-Content $configPath -Raw + $content = $content -replace '(?m)^(\s*(?:auth_token|invite_token)\s*:\s*).*$', "`${1}$Token" + Set-Content -Path $configPath -Value $content -NoNewline + Write-Host " Updated token in $configPath" + } + # Update the ghost config too + if (Test-Path $ghostConfig) { + $content = Get-Content $ghostConfig -Raw + $content = $content -replace '(?m)^(\s*(?:auth_token|invite_token)\s*:\s*).*$', "`${1}$Token" + Set-Content -Path $ghostConfig -Value $content -NoNewline + Write-Host " Updated token in $ghostConfig" + } +} + +Write-Host "[5/6] Re-registering Scheduled Task with auto-restart..." $action = New-ScheduledTaskAction -Execute "cmd.exe" -Argument "/c `"$pythonPath $mainPath >> $logPath 2>&1`"" $principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest -$settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable +$settings = New-ScheduledTaskSettingsSet ` + -AllowStartIfOnBatteries ` + -DontStopIfGoingOnBatteries ` + -StartWhenAvailable ` + -RestartCount 3 ` + -RestartInterval (New-TimeSpan -Minutes 1) Unregister-ScheduledTask -TaskName $taskName -Confirm:$false -ErrorAction SilentlyContinue Register-ScheduledTask -TaskName $taskName -Action $action -Principal $principal -Settings $settings Start-ScheduledTask -TaskName $taskName + +Write-Host "[6/6] Verifying startup (waiting 5s)..." +Start-Sleep -Seconds 5 + +$running = Get-ScheduledTask -TaskName $taskName | Select-Object -ExpandProperty State +$procs = (Get-Process -Name python -ErrorAction SilentlyContinue).Count +Write-Host "" +Write-Host "=== Result ===" +Write-Host " Task state : $running" +Write-Host " Python PIDs: $procs" +if ($running -eq "Running") { + Write-Host " [OK] CortexAgent is running." +} else { + Write-Host " [WARN] Task is not in Running state. Check $logPath for errors." +}