Newer
Older
cortex-hub / agent-node / bootstrap_installer.py
#!/usr/bin/env python3
"""
Cortex Agent Node — Bootstrap Installer
========================================
This is the ONLY file a new user needs to download.
It handles everything else automatically:

  1. Connects to the Cortex Hub and downloads the full agent-node code
  2. Installs Python dependencies
  3. Launches the agent (which then handles all future self-updates)

Usage:
    python3 bootstrap_installer.py --hub https://ai.jerxie.com --token <your-token> --node-id my-node

Or with an agent_config.yaml in the same directory — see docs.
"""

import os
import sys
import json
import shutil
import tarfile
import tempfile
import argparse
import subprocess
import socket
import urllib.request
import urllib.error

# ── Minimal defaults — overridden by CLI args or agent_config.yaml ────────────
DEFAULT_HUB = "" # Must be provided via --hub or config
INSTALL_DIR = os.path.join(os.path.expanduser("~"), ".cortex", "agent-node")


def _print(msg: str):
    print(f"[Cortex Bootstrap] {msg}", flush=True)


def _fetch_version(hub_url: str, token: str) -> str:
    url = f"{hub_url.rstrip('/')}/api/v1/agent/version"
    try:
        req = urllib.request.Request(url, headers={"X-Agent-Token": token})
        with urllib.request.urlopen(req, timeout=15) as resp:
            return json.loads(resp.read().decode()).get("version", "unknown")
    except Exception as e:
        _print(f"ERROR: Could not reach hub at {url}: {e}")
        sys.exit(1)


def _download_tarball(hub_url: str, token: str, dest_path: str):
    url = f"{hub_url.rstrip('/')}/api/v1/agent/download"
    _print(f"Downloading agent from {url} ...")
    try:
        req = urllib.request.Request(url, headers={"X-Agent-Token": token})
        with urllib.request.urlopen(req, timeout=120) as resp, open(dest_path, "wb") as f:
            total = 0
            while True:
                chunk = resp.read(65536)
                if not chunk:
                    break
                f.write(chunk)
                total += len(chunk)
                print(f"\r  {total // 1024} KB downloaded...", end="", flush=True)
        print()
        _print("Download complete.")
    except Exception as e:
        _print(f"ERROR: Download failed: {e}")
        sys.exit(1)


def _install(hub_url: str, token: str, install_dir: str):
    """Downloads and installs the agent-node into install_dir."""
    tmp_dir = tempfile.mkdtemp(prefix="cortex_bootstrap_")
    try:
        tarball = os.path.join(tmp_dir, "agent.tar.gz")
        _download_tarball(hub_url, token, tarball)

        _print(f"Extracting to {install_dir} ...")
        if os.path.exists(install_dir):
            try:
                # We try to clean up, but ignore errors for read-only mounts (docker)
                for item in os.listdir(install_dir):
                    item_path = os.path.join(install_dir, item)
                    try:
                        if os.path.isdir(item_path):
                            shutil.rmtree(item_path)
                        else:
                            os.unlink(item_path)
                    except Exception:
                        continue # Skip things we can't delete
            except Exception as e:
                _print(f"    [!] Warning: Could not fully clean install dir: {e}")
        os.makedirs(install_dir, exist_ok=True)

        with tarfile.open(tarball, "r:gz") as tar:
            # Safely strip the top-level 'agent-node/' prefix only if it exists
            members = tar.getmembers()
            for member in members:
                if member.name.startswith("agent-node/"):
                    member.name = member.name.replace("agent-node/", "", 1)
                
                if member.name:
                    tar.extract(member, install_dir)

        _print("Extraction complete.")
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)


def _install_deps(install_dir: str, skip_browsers: bool = False):
    req_file = os.path.join(install_dir, "requirements.txt")
    if not os.path.exists(req_file):
        _print("No requirements.txt found — skipping dependency install.")
        return
    
    _print("Checking for pip...")
    pip_found = False
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        pip_found = True
    except subprocess.CalledProcessError:
        _print("pip not found. Attempting to bootstrap pip via ensurepip...")
        try:
            subprocess.check_call([sys.executable, "-m", "ensurepip", "--default-pip"], stdout=subprocess.DEVNULL)
            pip_found = True
            _print("pip bootstrapped successfully.")
        except Exception as e:
            _print(f"Warning: Failed to bootstrap pip: {e}. If dependencies fail, please install python3-pip manually.")

    _print("Installing Python dependencies (resilient mode) ...")
    try:
        # Using --ignore-installed to bypass "no RECORD file found" metadata errors common on Mac/Anaconda
        # and --user if we don't have root (though usually we do on NAS)
        args = [sys.executable, "-m", "pip", "install", "-r", req_file, "--quiet", "--ignore-installed"]
        
        # Try a quick check for root/write access to site-packages
        try:
            subprocess.check_call(args, cwd=install_dir)
        except subprocess.CalledProcessError:
            _print("Standard install failed. Trying --user install...")
            args.append("--user")
            subprocess.check_call(args, cwd=install_dir)
            
        _print("Dependencies installed.")
        
        # New: Auto-install playwright browsers if the package is present
        if skip_browsers:
            _print("Skipping Playwright browser installation as requested.")
            return

        try:
            import playwright
            _print("Playwright detected. Installing chromium browser...")
            # We add --with-deps but check for root first
            cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
            subprocess.run(cmd, check=True)
            _print("Playwright browsers installed.")
        except ImportError:
            pass # No playwright needed
        except Exception as e:
            _print(f"Warning: Failed to install Playwright browsers: {e}")

    except Exception as e:
        _print(f"ERROR: Failed to install dependencies: {e}")
        _print("The agent might fail to start if core libraries (grpcio, psutil) are missing.")


def _write_config(install_dir: str, node_id: str, hub_url: str, token: str, grpc_endpoint: str, secret_key: str = None):
    """Writes an agent_config.yaml into the install directory."""
    import yaml
    config = {
        "node_id": node_id,
        "hub_url": hub_url,
        "grpc_endpoint": grpc_endpoint,
        "auth_token": token,
        "invite_token": token,
        "secret_key": secret_key,
        "auto_update": True,
        "update_check_interval": 300,
    }
    config_path = os.path.join(install_dir, "agent_config.yaml")
    try:
        with open(config_path, "w") as f:
            yaml.dump(config, f, default_flow_style=False)
        _print(f"Config written to {config_path}")
    except ImportError:
        # yaml not yet installed — write manually
        lines = [f"{k}: {v}\n" for k, v in config.items()]
        with open(config_path, "w") as f:
            f.writelines(lines)
        _print(f"Config written (raw) to {config_path}")


def _launch(install_dir: str, as_daemon: bool = False):
    """Launches the agent in-place, or installs it as a background daemon."""
    if as_daemon:
        _print("Installing as a background daemon service...")
        daemon_script = os.path.join(install_dir, "install_service.py")
        if os.path.exists(daemon_script):
            os.chdir(install_dir)
            sys.path.insert(0, install_dir)
            import subprocess
            subprocess.run([sys.executable, daemon_script])
            _print("Bootstrap complete. Agent is running in the background.")
            sys.exit(0)
        else:
            _print(f"ERROR: install_service.py not found at {daemon_script}")
            sys.exit(1)

    entry = os.path.join(install_dir, "src", "agent_node", "main.py")
    _print(f"Launching agent in foreground: {sys.executable} {entry}")
    sys.stdout.flush()
    sys.stderr.flush()
    os.chdir(install_dir)
    # Add install_dir to path so imports resolve
    sys.path.insert(0, install_dir)
    os.execv(sys.executable, [sys.executable, entry])


def main():
    parser = argparse.ArgumentParser(
        description="Cortex Agent Node Bootstrap Installer",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python3 bootstrap_installer.py --hub https://ai.jerxie.com --token abc123 --node-id my-laptop
  python3 bootstrap_installer.py  # reads from agent_config.yaml in current dir
        """
    )
    parser.add_argument("--hub", default=None, help=f"Hub URL (default: {DEFAULT_HUB})")
    parser.add_argument("--token", default=None, help="Agent auth token")
    parser.add_argument("--node-id", default=None, help="Unique node identifier")
    parser.add_argument("--grpc", default=None, help="gRPC endpoint (default: derived from hub URL)")
    parser.add_argument("--install-dir", default=INSTALL_DIR, help=f"Install path (default: {INSTALL_DIR})")
    parser.add_argument("--update-only", action="store_true", help="Only pull latest code, don't re-launch")
    parser.add_argument("--daemon", action="store_true", help="Install and run as a persistent background daemon (macOS/Linux)")
    parser.add_argument("--skip-browsers", action="store_true", help="Skip automatic Playwright browser installation")
    args = parser.parse_args()

    # Try loading existing config for defaults
    # Look in CWD first, then in the target install_dir
    install_dir = args.install_dir
    config_paths = ["agent_config.yaml", os.path.join(install_dir, "agent_config.yaml")]
    existing_config = {}
    
    for config_path in config_paths:
        if os.path.exists(config_path):
            try:
                import yaml
                with open(config_path) as f:
                    existing_config = yaml.safe_load(f) or {}
                _print(f"Loaded existing config from {config_path}")
                break
            except ImportError:
                # Resilient fallback for fresh environments: manual parsing
                try:
                    with open(config_path) as f:
                        for line in f:
                            if ":" in line:
                                k, v = line.split(":", 1)
                                # Strip quotes/spaces
                                existing_config[k.strip()] = v.strip().strip('"').strip("'")
                    _print(f"Loaded existing config (manual parse) from {config_path}")
                    break
                except Exception:
                    pass
            except Exception:
                pass

    hub_url   = args.hub   or existing_config.get("hub_url", DEFAULT_HUB)
    
    # 1. Token for Hub HTTP API (fetching versions, downloading code)
    # This MUST be the hub's global SECRET_KEY.
    hub_token = args.token or existing_config.get("secret_key") or existing_config.get("auth_token") or os.getenv("AGENT_AUTH_TOKEN", "")
    
    # 2. Token for Node gRPC Authentication (handshake/registration)
    # This is the node-specific invite_token.
    node_token = args.token or existing_config.get("auth_token") or os.getenv("AGENT_AUTH_TOKEN", "")

    node_id   = args.node_id or existing_config.get("node_id")
    if not node_id:
        node_id = socket.gethostname() or "cortex-node"
    
    # Ensure grpc endpoint has a port
    grpc = args.grpc or existing_config.get("grpc_endpoint")
    if not grpc:
        grpc = hub_url.replace("https://", "").replace("http://", "") + ":50051"
    elif ":" not in grpc:
        grpc = f"{grpc}:50051"

    install_dir = args.install_dir

    if not hub_url:
        _print("ERROR: --hub is required (or set in agent_config.yaml)")
        sys.exit(1)

    if not hub_token:
        _print("ERROR: --token is required (or set AGENT_AUTH_TOKEN env var)")
        sys.exit(1)

    _print(f"Hub: {hub_url}")
    remote_version = _fetch_version(hub_url, hub_token)
    _print(f"Remote agent version: {remote_version}")

    # Check if already installed and up to date
    local_version_file = os.path.join(install_dir, "VERSION")
    if os.path.exists(local_version_file):
        with open(local_version_file) as f:
            local_version = f.read().strip()
        if local_version == remote_version and not args.update_only and not args.daemon:
            _print(f"Already at {local_version} — launching existing installation.")
            _launch(install_dir, as_daemon=args.daemon)
            return  # unreachable unless daemon

        _print(f"Updating {local_version} → {remote_version}")

    skip_browsers = args.skip_browsers or existing_config.get("skip_browsers", False)

    _install(hub_url, hub_token, install_dir)
    _install_deps(install_dir, skip_browsers=skip_browsers)
    _write_config(install_dir, node_id, hub_url, node_token, grpc, secret_key=hub_token)

    if args.update_only:
        _print(f"✅ Updated to v{remote_version}. Not launching (--update-only).")
        return

    _print(f"✅ Agent v{remote_version} installed at {install_dir}")
    _launch(install_dir, as_daemon=args.daemon)  # replaces this process or exits


if __name__ == "__main__":
    main()