Newer
Older
cortex-hub / agent-node / src / agent_node / main.py
import sys
import os

# Add root to path to find protos and other packages
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

import signal
import time
from agent_node.config import NODE_ID, HUB_URL, AUTH_TOKEN, SECRET_KEY, AUTO_UPDATE, UPDATE_CHECK_INTERVAL
from agent_node.core import updater

def main():
    import logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        handlers=[logging.StreamHandler(sys.stdout)]
    )
    print(f"[*] Starting Agent Node: {NODE_ID}...")

    # 0. Auto-Update Check (before anything else — if we're behind, restart now)
    # This uses only standard libraries, making it extremely resilient.
    if AUTO_UPDATE:
        try:
            updater.init(hub_http_url=HUB_URL, auth_token=SECRET_KEY, check_interval_secs=UPDATE_CHECK_INTERVAL)
            updater.check_and_update_once()  # May restart process — does not return if update applied
            
            # Start background updater BEFORE initializing the node.
            # This ensures that even if AgentNode() crashes (e.g. missing grpcio), 
            # the node can still self-repair if a fix is pushed to the hub.
            updater.start_background_updater()
        except Exception as e:
            print(f"[!] Updater initialization failed: {e}. Moving on to agent boot...")

    # 1. Initialization and Main Persistence Loop
    while True:
        try:
            # Deferred Import: We import the core AgentNode inside the loop.
            # This ensures that even if a future update breaks the code (e.g. missing dependency),
            # the process STILL stays alive and the Background Updater (started above) 
            # can still pull a new fix when it becomes available on the hub.
            from agent_node.node import AgentNode
            
            node = AgentNode()
            
            # 2. Signal Handling for Graceful Shutdown
            def handle_exit(sig, frame):
                node.stop()
                sys.exit(0)
                
            signal.signal(signal.SIGINT, handle_exit)
            signal.signal(signal.SIGTERM, handle_exit)

            # Handshake: Sync configuration and Sandbox Policy
            node.sync_configuration()
            
            # 3. Background: Start health reporting (Heartbeats)
            node.start_health_reporting()

            # 4. Foreground: Run Persistent Task Stream (Indefinite wait)
            node.run_task_stream()
            
        except Exception as e:
            print(f"[!] Main Agent process crashed: {e}. Retrying boot in 10s...", flush=True)
            import traceback
            traceback.print_exc()
            time.sleep(10)

if __name__ == '__main__':
    main()