diff --git a/agent-node/VERSION b/agent-node/VERSION index a970716..8fc77d0 100644 --- a/agent-node/VERSION +++ b/agent-node/VERSION @@ -1 +1 @@ -1.0.15 +1.0.17 diff --git a/agent-node/src/agent_node/main.py b/agent-node/src/agent_node/main.py index c4177ca..1085314 100644 --- a/agent-node/src/agent_node/main.py +++ b/agent-node/src/agent_node/main.py @@ -5,7 +5,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import signal -from agent_node.node import AgentNode +import time from agent_node.config import NODE_ID, HUB_URL, AUTH_TOKEN, SECRET_KEY, AUTO_UPDATE, UPDATE_CHECK_INTERVAL from agent_node.core import updater @@ -19,33 +19,52 @@ print(f"[*] Starting Agent Node: {NODE_ID}...") # 0. Auto-Update Check (before anything else — if we're behind, restart now) + # This uses only standard libraries, making it extremely resilient. if AUTO_UPDATE: - updater.init(hub_http_url=HUB_URL, auth_token=SECRET_KEY, check_interval_secs=UPDATE_CHECK_INTERVAL) - updater.check_and_update_once() # May restart process — does not return if update applied + try: + updater.init(hub_http_url=HUB_URL, auth_token=SECRET_KEY, check_interval_secs=UPDATE_CHECK_INTERVAL) + updater.check_and_update_once() # May restart process — does not return if update applied + + # Start background updater BEFORE initializing the node. + # This ensures that even if AgentNode() crashes (e.g. missing grpcio), + # the node can still self-repair if a fix is pushed to the hub. + updater.start_background_updater() + except Exception as e: + print(f"[!] Updater initialization failed: {e}. Moving on to agent boot...") - # 1. Initialization - node = AgentNode() - - # 2. Signal Handling for Graceful Shutdown - def handle_exit(sig, frame): - node.stop() - sys.exit(0) - - signal.signal(signal.SIGINT, handle_exit) - signal.signal(signal.SIGTERM, handle_exit) + # 1. Initialization and Main Persistence Loop + while True: + try: + # Deferred Import: We import the core AgentNode inside the loop. + # This ensures that even if a future update breaks the code (e.g. missing dependency), + # the process STILL stays alive and the Background Updater (started above) + # can still pull a new fix when it becomes available on the hub. + from agent_node.node import AgentNode + + node = AgentNode() + + # 2. Signal Handling for Graceful Shutdown + def handle_exit(sig, frame): + node.stop() + sys.exit(0) + + signal.signal(signal.SIGINT, handle_exit) + signal.signal(signal.SIGTERM, handle_exit) - # Handshake: Sync configuration and Sandbox Policy - node.sync_configuration() - - # 3. Background: Start health reporting (Heartbeats) - node.start_health_reporting() + # Handshake: Sync configuration and Sandbox Policy + node.sync_configuration() + + # 3. Background: Start health reporting (Heartbeats) + node.start_health_reporting() - # 4. Background: Periodic auto-update checks - if AUTO_UPDATE: - updater.start_background_updater() - - # 5. Foreground: Run Persistent Task Stream - node.run_task_stream() + # 4. Foreground: Run Persistent Task Stream (Indefinite wait) + node.run_task_stream() + + except Exception as e: + print(f"[!] Main Agent process crashed: {e}. Retrying boot in 10s...", flush=True) + import traceback + traceback.print_exc() + time.sleep(10) if __name__ == '__main__': main()