diff --git a/docs/architecture/workspace_mirroring_design.md b/docs/architecture/workspace_mirroring_design.md index 788d53c..a888fc3 100644 --- a/docs/architecture/workspace_mirroring_design.md +++ b/docs/architecture/workspace_mirroring_design.md @@ -60,6 +60,14 @@ - [x] Multi-node propagation (Server broadcasts Lead Node updates to Support Nodes). ### Phase 3: Conflict Handling & Optimization -- [ ] Implementation of `.cortexignore` filtering. -- [ ] Workspace state locking and concurrency guards. -- [ ] Verification of Content-Addressable Storage (CAS) to optimize binary transfers. +- [x] Implementation of `.cortexignore` and `.gitignore` filtering. +- [x] Workspace state locking and concurrency guards. +- [x] Dynamic reloading of ignore rules on change. + +### Phase 4: Browser & Skill Integration +- [x] Integrate browser download handling into mirrored workspace. +- [x] Implement dynamic session mounting for agent skills (CWD sharing). + +### Phase 5: Distributed Conflict Resolution & Resiliency +- [x] Implement incremental hash-based drift detection. +- [x] Finalize automatic drift recovery logic for reconnection. diff --git a/poc-grpc-agent/agent_node/core/sync.py b/poc-grpc-agent/agent_node/core/sync.py index 16f70cf..887c284 100644 --- a/poc-grpc-agent/agent_node/core/sync.py +++ b/poc-grpc-agent/agent_node/core/sync.py @@ -16,14 +16,31 @@ os.makedirs(path, exist_ok=True) return path - def handle_manifest(self, session_id: str, manifest: agent_pb2.DirectoryManifest): - """Prepares the local directory based on the server manifest.""" + def handle_manifest(self, session_id: str, manifest: agent_pb2.DirectoryManifest) -> list: + """Compares local files with the server manifest and returns paths needing update.""" session_dir = self.get_session_dir(session_id) - print(f"[📁] Preparing Sync Directory: {session_dir}") - # In Phase 1, we just ensure directories exist + print(f"[📁] Reconciling Sync Directory: {session_dir}") + + needs_update = [] for file_info in manifest.files: + target_path = os.path.join(session_dir, file_info.path) + if file_info.is_dir: - os.makedirs(os.path.join(session_dir, file_info.path), exist_ok=True) + os.makedirs(target_path, exist_ok=True) + continue + + # File Check + if not os.path.exists(target_path): + needs_update.append(file_info.path) + else: + # Hash comparison + with open(target_path, "rb") as f: + actual_hash = hashlib.sha256(f.read()).hexdigest() + if actual_hash != file_info.hash: + print(f" [⚠️] Drift Detected: {file_info.path} (Local: {actual_hash[:8]} vs Remote: {file_info.hash[:8]})") + needs_update.append(file_info.path) + + return needs_update def write_chunk(self, session_id: str, payload: agent_pb2.FilePayload) -> bool: """Writes a file chunk to the local session directory.""" diff --git a/poc-grpc-agent/agent_node/node.py b/poc-grpc-agent/agent_node/node.py index c92d7a5..1fb873f 100644 --- a/poc-grpc-agent/agent_node/node.py +++ b/poc-grpc-agent/agent_node/node.py @@ -129,7 +129,26 @@ """Processes inbound file synchronization messages from the Orchestrator.""" sid = fs.session_id if fs.HasField("manifest"): - self.sync_mgr.handle_manifest(sid, fs.manifest) + needs_update = self.sync_mgr.handle_manifest(sid, fs.manifest) + if needs_update: + print(f" [📁⚠️] Drift Detected for {sid}: {len(needs_update)} files need sync") + self.task_queue.put(agent_pb2.ClientTaskMessage( + file_sync=agent_pb2.FileSyncMessage( + session_id=sid, + status=agent_pb2.SyncStatus( + code=agent_pb2.SyncStatus.RECONCILE_REQUIRED, + message=f"Drift detected in {len(needs_update)} files", + reconcile_paths=needs_update + ) + ) + )) + else: + self.task_queue.put(agent_pb2.ClientTaskMessage( + file_sync=agent_pb2.FileSyncMessage( + session_id=sid, + status=agent_pb2.SyncStatus(code=agent_pb2.SyncStatus.OK, message="Synchronized") + ) + )) elif fs.HasField("file_data"): success = self.sync_mgr.write_chunk(sid, fs.file_data) if fs.file_data.is_final: @@ -156,6 +175,11 @@ elif ctrl.action == agent_pb2.SyncControl.REFRESH_MANIFEST: # Node -> Server Manifest Push self._push_full_manifest(sid, ctrl.path) + elif ctrl.action == agent_pb2.SyncControl.RESYNC: + # Server -> Node asks for a check, but Node only has its own manifest? + # Actually RESYNC usually comes with a manifest or implies "send me yours so I can check" + # Here we'll treat RESYNC as "Send me your manifest" + self._push_full_manifest(sid, ctrl.path) def _push_full_manifest(self, session_id, rel_path="."): """Pushes the current local manifest back to the server.""" diff --git a/poc-grpc-agent/orchestrator/app.py b/poc-grpc-agent/orchestrator/app.py index dfd5fda..d8ecd77 100644 --- a/poc-grpc-agent/orchestrator/app.py +++ b/poc-grpc-agent/orchestrator/app.py @@ -62,6 +62,8 @@ print("\n[🧠] AI Phase: Ghost Mirror Workspace Sync (Multi-Node Broadcast)...") for node_id in active_nodes: orch.assistant.push_workspace(node_id, "test-session-001") + # Ensure Phase 5 recovery tracking works + orch.assistant.push_workspace(node_id, "recovery-session") time.sleep(2) # Start watching only on the first node to test broadcast to others @@ -88,7 +90,13 @@ print(f" Browser Result: {res_browser}", flush=True) # Stay alive for diagnostics - time.sleep(60) + time.sleep(55) + + # Phase 5: Distributed Drift Recovery + print("\n[🧠] AI Phase 5: Re-triggering Sync for Drift Recovery (Phase 5)...") + orch.assistant.push_workspace(target_node, "test-session-001") + + time.sleep(10) # Phase 4 Pro: Perception & Evaluation print("\n[🧠] AI Phase 4 Pro: Perception & Advanced Logic...") a11y_action = agent_pb2.BrowserAction( diff --git a/poc-grpc-agent/orchestrator/services/assistant.py b/poc-grpc-agent/orchestrator/services/assistant.py index 26bad63..245ada6 100644 --- a/poc-grpc-agent/orchestrator/services/assistant.py +++ b/poc-grpc-agent/orchestrator/services/assistant.py @@ -1,6 +1,7 @@ import time import json import os +import hashlib from orchestrator.utils.crypto import sign_payload, sign_browser_action from protos import agent_pb2 @@ -11,6 +12,7 @@ self.journal = journal self.pool = pool self.mirror = mirror + self.memberships = {} # session_id -> list(node_id) def push_workspace(self, node_id, session_id): """Initial unidirectional push from server ghost mirror to a node.""" @@ -18,6 +20,13 @@ if not node or not self.mirror: return print(f"[📁📤] Initiating Workspace Push for Session {session_id} to {node_id}") + + # Track for recovery + if session_id not in self.memberships: + self.memberships[session_id] = [] + if node_id not in self.memberships[session_id]: + self.memberships[session_id].append(node_id) + manifest = self.mirror.generate_manifest(session_id) # 1. Send Manifest @@ -31,31 +40,40 @@ # 2. Send File Data for file_info in manifest.files: if not file_info.is_dir: - self._push_file(node, session_id, file_info) + self.push_file(node_id, session_id, file_info.path) - def _push_file(self, node, session_id, file_info): - """Chunks and sends a single file to a node.""" - full_path = os.path.join(self.mirror.get_workspace_path(session_id), file_info.path) - chunk_size = 1024 * 64 # 64KB chunks + def push_file(self, node_id, session_id, rel_path): + """Pushes a specific file to a node (used for drift recovery).""" + node = self.registry.get_node(node_id) + if not node: return - with open(full_path, "rb") as f: + workspace = self.mirror.get_workspace_path(session_id) + abs_path = os.path.join(workspace, rel_path) + + if not os.path.exists(abs_path): + print(f" [📁❓] Requested file {rel_path} not found in mirror") + return + + with open(abs_path, "rb") as f: + full_data = f.read() + full_hash = hashlib.sha256(full_data).hexdigest() + f.seek(0) + index = 0 while True: - chunk = f.read(chunk_size) - is_final = len(chunk) < chunk_size - - payload = agent_pb2.FilePayload( - path=file_info.path, - chunk=chunk, - chunk_index=index, - is_final=is_final, - hash=file_info.hash if is_final else "" - ) + chunk = f.read(1024 * 1024) # 1MB chunks + is_final = len(chunk) < 1024 * 1024 node["queue"].put(agent_pb2.ServerTaskMessage( file_sync=agent_pb2.FileSyncMessage( session_id=session_id, - file_data=payload + file_data=agent_pb2.FilePayload( + path=rel_path, + chunk=chunk, + chunk_index=index, + is_final=is_final, + hash=full_hash if is_final else "" + ) ) )) @@ -63,6 +81,14 @@ break index += 1 + def reconcile_node(self, node_id): + """Forces a re-sync check for all sessions this node belongs to.""" + print(f" [📁🔄] Triggering Resync Check for {node_id}...") + for sid, nodes in self.memberships.items(): + if node_id in nodes: + # Re-push manifest to trigger node-side drift check + self.push_workspace(node_id, sid) + def broadcast_file_chunk(self, session_id: str, sender_node_id: str, file_payload): """Broadcasts a file chunk received from one node to all other nodes in the mesh.""" print(f" [📁📢] Broadcasting {file_payload.path} from {sender_node_id} to other nodes...") diff --git a/poc-grpc-agent/orchestrator/services/grpc_server.py b/poc-grpc-agent/orchestrator/services/grpc_server.py index f650120..923684e 100644 --- a/poc-grpc-agent/orchestrator/services/grpc_server.py +++ b/poc-grpc-agent/orchestrator/services/grpc_server.py @@ -83,6 +83,9 @@ return print(f"[📶] Stream Online for {node_id}") + + # Phase 5: Automatic Reconciliation on Reconnect + self.assistant.reconcile_node(node_id) # 2. Results Listener (Read Thread) def _read_results(): @@ -165,6 +168,10 @@ self.assistant.broadcast_file_chunk(fs.session_id, node_id, fs.file_data) elif fs.HasField("status"): print(f" [📁] Sync Status from {node_id}: {fs.status.message}") + if fs.status.code == agent_pb2.SyncStatus.RECONCILE_REQUIRED: + print(f" [📁🔄] Server triggering recovery sync for {len(fs.status.reconcile_paths)} files to {node_id}") + for path in fs.status.reconcile_paths: + self.assistant.push_file(node_id, fs.session_id, path) def ReportHealth(self, request_iterator, context): """Collect Health Metrics and Feed Policy Updates.""" diff --git a/poc-grpc-agent/phase3_test_v2.txt b/poc-grpc-agent/phase3_test_v2.txt deleted file mode 100644 index 8d6c508..0000000 --- a/poc-grpc-agent/phase3_test_v2.txt +++ /dev/null @@ -1 +0,0 @@ -python3: can't open file '/app/poc-grpc-agent/poc-grpc-agent/test_mesh.py': [Errno 2] No such file or directory diff --git a/poc-grpc-agent/protos/agent.proto b/poc-grpc-agent/protos/agent.proto index 4ab3d8c..5e3932d 100644 --- a/poc-grpc-agent/protos/agent.proto +++ b/poc-grpc-agent/protos/agent.proto @@ -207,6 +207,7 @@ LOCK = 2; // Server -> Node: Disable user-side edits UNLOCK = 3; // Server -> Node: Enable user-side edits REFRESH_MANIFEST = 4; // Server -> Node: Request a full manifest from node + RESYNC = 5; // Server -> Node: Force a hash-based reconciliation } Action action = 1; string path = 2; @@ -241,4 +242,5 @@ } Code code = 1; string message = 2; + repeated string reconcile_paths = 3; // NEW: Files needing immediate re-sync } diff --git a/poc-grpc-agent/protos/agent_pb2.py b/poc-grpc-agent/protos/agent_pb2.py index 35cea2a..3472d01 100644 --- a/poc-grpc-agent/protos/agent_pb2.py +++ b/poc-grpc-agent/protos/agent_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12protos/agent.proto\x12\x05\x61gent\"\xde\x01\n\x13RegistrationRequest\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nauth_token\x18\x03 \x01(\t\x12\x18\n\x10node_description\x18\x04 \x01(\t\x12\x42\n\x0c\x63\x61pabilities\x18\x05 \x03(\x0b\x32,.agent.RegistrationRequest.CapabilitiesEntry\x1a\x33\n\x11\x43\x61pabilitiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xc5\x01\n\rSandboxPolicy\x12\'\n\x04mode\x18\x01 \x01(\x0e\x32\x19.agent.SandboxPolicy.Mode\x12\x18\n\x10\x61llowed_commands\x18\x02 \x03(\t\x12\x17\n\x0f\x64\x65nied_commands\x18\x03 \x03(\t\x12\x1a\n\x12sensitive_commands\x18\x04 \x03(\t\x12\x18\n\x10working_dir_jail\x18\x05 \x01(\t\"\"\n\x04Mode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"x\n\x14RegistrationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x15\n\rerror_message\x18\x02 \x01(\t\x12\x12\n\nsession_id\x18\x03 \x01(\t\x12$\n\x06policy\x18\x04 \x01(\x0b\x32\x14.agent.SandboxPolicy\"\xff\x01\n\x11\x43lientTaskMessage\x12,\n\rtask_response\x18\x01 \x01(\x0b\x32\x13.agent.TaskResponseH\x00\x12-\n\ntask_claim\x18\x02 \x01(\x0b\x32\x17.agent.TaskClaimRequestH\x00\x12,\n\rbrowser_event\x18\x03 \x01(\x0b\x32\x13.agent.BrowserEventH\x00\x12\'\n\x08\x61nnounce\x18\x04 \x01(\x0b\x32\x13.agent.NodeAnnounceH\x00\x12+\n\tfile_sync\x18\x05 \x01(\x0b\x32\x16.agent.FileSyncMessageH\x00\x42\t\n\x07payload\"\x1f\n\x0cNodeAnnounce\x12\x0f\n\x07node_id\x18\x01 \x01(\t\"\x87\x01\n\x0c\x42rowserEvent\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12,\n\x0b\x63onsole_msg\x18\x02 \x01(\x0b\x32\x15.agent.ConsoleMessageH\x00\x12,\n\x0bnetwork_req\x18\x03 \x01(\x0b\x32\x15.agent.NetworkRequestH\x00\x42\x07\n\x05\x65vent\"\x8d\x02\n\x11ServerTaskMessage\x12*\n\x0ctask_request\x18\x01 \x01(\x0b\x32\x12.agent.TaskRequestH\x00\x12\x31\n\x10work_pool_update\x18\x02 \x01(\x0b\x32\x15.agent.WorkPoolUpdateH\x00\x12\x30\n\x0c\x63laim_status\x18\x03 \x01(\x0b\x32\x18.agent.TaskClaimResponseH\x00\x12/\n\x0btask_cancel\x18\x04 \x01(\x0b\x32\x18.agent.TaskCancelRequestH\x00\x12+\n\tfile_sync\x18\x05 \x01(\x0b\x32\x16.agent.FileSyncMessageH\x00\x42\t\n\x07payload\"$\n\x11TaskCancelRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\"\xd1\x01\n\x0bTaskRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x11\n\ttask_type\x18\x02 \x01(\t\x12\x16\n\x0cpayload_json\x18\x03 \x01(\tH\x00\x12.\n\x0e\x62rowser_action\x18\x07 \x01(\x0b\x32\x14.agent.BrowserActionH\x00\x12\x12\n\ntimeout_ms\x18\x04 \x01(\x05\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x11\n\tsignature\x18\x06 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\tB\t\n\x07payload\"\xa0\x02\n\rBrowserAction\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.agent.BrowserAction.ActionType\x12\x0b\n\x03url\x18\x02 \x01(\t\x12\x10\n\x08selector\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nsession_id\x18\x05 \x01(\t\x12\t\n\x01x\x18\x06 \x01(\x05\x12\t\n\x01y\x18\x07 \x01(\x05\"\x86\x01\n\nActionType\x12\x0c\n\x08NAVIGATE\x10\x00\x12\t\n\x05\x43LICK\x10\x01\x12\x08\n\x04TYPE\x10\x02\x12\x0e\n\nSCREENSHOT\x10\x03\x12\x0b\n\x07GET_DOM\x10\x04\x12\t\n\x05HOVER\x10\x05\x12\n\n\x06SCROLL\x10\x06\x12\t\n\x05\x43LOSE\x10\x07\x12\x08\n\x04\x45VAL\x10\x08\x12\x0c\n\x08GET_A11Y\x10\t\"\xe0\x02\n\x0cTaskResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12*\n\x06status\x18\x02 \x01(\x0e\x32\x1a.agent.TaskResponse.Status\x12\x0e\n\x06stdout\x18\x03 \x01(\t\x12\x0e\n\x06stderr\x18\x04 \x01(\t\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x35\n\tartifacts\x18\x06 \x03(\x0b\x32\".agent.TaskResponse.ArtifactsEntry\x12\x30\n\x0e\x62rowser_result\x18\x07 \x01(\x0b\x32\x16.agent.BrowserResponseH\x00\x1a\x30\n\x0e\x41rtifactsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"<\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x0b\n\x07TIMEOUT\x10\x02\x12\r\n\tCANCELLED\x10\x03\x42\x08\n\x06result\"\xdc\x01\n\x0f\x42rowserResponse\x12\x0b\n\x03url\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x10\n\x08snapshot\x18\x03 \x01(\x0c\x12\x13\n\x0b\x64om_content\x18\x04 \x01(\t\x12\x11\n\ta11y_tree\x18\x05 \x01(\t\x12\x13\n\x0b\x65val_result\x18\x06 \x01(\t\x12.\n\x0f\x63onsole_history\x18\x07 \x03(\x0b\x32\x15.agent.ConsoleMessage\x12.\n\x0fnetwork_history\x18\x08 \x03(\x0b\x32\x15.agent.NetworkRequest\"C\n\x0e\x43onsoleMessage\x12\r\n\x05level\x18\x01 \x01(\t\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x14\n\x0ctimestamp_ms\x18\x03 \x01(\x03\"h\n\x0eNetworkRequest\x12\x0e\n\x06method\x18\x01 \x01(\t\x12\x0b\n\x03url\x18\x02 \x01(\t\x12\x0e\n\x06status\x18\x03 \x01(\x05\x12\x15\n\rresource_type\x18\x04 \x01(\t\x12\x12\n\nlatency_ms\x18\x05 \x01(\x03\",\n\x0eWorkPoolUpdate\x12\x1a\n\x12\x61vailable_task_ids\x18\x01 \x03(\t\"4\n\x10TaskClaimRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\t\"E\n\x11TaskClaimResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07granted\x18\x02 \x01(\x08\x12\x0e\n\x06reason\x18\x03 \x01(\t\"\xc1\x01\n\tHeartbeat\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x19\n\x11\x63pu_usage_percent\x18\x02 \x01(\x02\x12\x1c\n\x14memory_usage_percent\x18\x03 \x01(\x02\x12\x1b\n\x13\x61\x63tive_worker_count\x18\x04 \x01(\x05\x12\x1b\n\x13max_worker_capacity\x18\x05 \x01(\x05\x12\x16\n\x0estatus_message\x18\x06 \x01(\t\x12\x18\n\x10running_task_ids\x18\x07 \x03(\t\"-\n\x13HealthCheckResponse\x12\x16\n\x0eserver_time_ms\x18\x01 \x01(\x03\"\xd3\x01\n\x0f\x46ileSyncMessage\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12,\n\x08manifest\x18\x02 \x01(\x0b\x32\x18.agent.DirectoryManifestH\x00\x12\'\n\tfile_data\x18\x03 \x01(\x0b\x32\x12.agent.FilePayloadH\x00\x12#\n\x06status\x18\x04 \x01(\x0b\x32\x11.agent.SyncStatusH\x00\x12%\n\x07\x63ontrol\x18\x05 \x01(\x0b\x32\x12.agent.SyncControlH\x00\x42\t\n\x07payload\"\xa3\x01\n\x0bSyncControl\x12)\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x19.agent.SyncControl.Action\x12\x0c\n\x04path\x18\x02 \x01(\t\"[\n\x06\x41\x63tion\x12\x12\n\x0eSTART_WATCHING\x10\x00\x12\x11\n\rSTOP_WATCHING\x10\x01\x12\x08\n\x04LOCK\x10\x02\x12\n\n\x06UNLOCK\x10\x03\x12\x14\n\x10REFRESH_MANIFEST\x10\x04\"F\n\x11\x44irectoryManifest\x12\x11\n\troot_path\x18\x01 \x01(\t\x12\x1e\n\x05\x66iles\x18\x02 \x03(\x0b\x32\x0f.agent.FileInfo\"D\n\x08\x46ileInfo\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04hash\x18\x03 \x01(\t\x12\x0e\n\x06is_dir\x18\x04 \x01(\x08\"_\n\x0b\x46ilePayload\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\r\n\x05\x63hunk\x18\x02 \x01(\x0c\x12\x13\n\x0b\x63hunk_index\x18\x03 \x01(\x05\x12\x10\n\x08is_final\x18\x04 \x01(\x08\x12\x0c\n\x04hash\x18\x05 \x01(\t\"\x87\x01\n\nSyncStatus\x12$\n\x04\x63ode\x18\x01 \x01(\x0e\x32\x16.agent.SyncStatus.Code\x12\x0f\n\x07message\x18\x02 \x01(\t\"B\n\x04\x43ode\x12\x06\n\x02OK\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x16\n\x12RECONCILE_REQUIRED\x10\x02\x12\x0f\n\x0bIN_PROGRESS\x10\x03\x32\xe9\x01\n\x11\x41gentOrchestrator\x12L\n\x11SyncConfiguration\x12\x1a.agent.RegistrationRequest\x1a\x1b.agent.RegistrationResponse\x12\x44\n\nTaskStream\x12\x18.agent.ClientTaskMessage\x1a\x18.agent.ServerTaskMessage(\x01\x30\x01\x12@\n\x0cReportHealth\x12\x10.agent.Heartbeat\x1a\x1a.agent.HealthCheckResponse(\x01\x30\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12protos/agent.proto\x12\x05\x61gent\"\xde\x01\n\x13RegistrationRequest\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nauth_token\x18\x03 \x01(\t\x12\x18\n\x10node_description\x18\x04 \x01(\t\x12\x42\n\x0c\x63\x61pabilities\x18\x05 \x03(\x0b\x32,.agent.RegistrationRequest.CapabilitiesEntry\x1a\x33\n\x11\x43\x61pabilitiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xc5\x01\n\rSandboxPolicy\x12\'\n\x04mode\x18\x01 \x01(\x0e\x32\x19.agent.SandboxPolicy.Mode\x12\x18\n\x10\x61llowed_commands\x18\x02 \x03(\t\x12\x17\n\x0f\x64\x65nied_commands\x18\x03 \x03(\t\x12\x1a\n\x12sensitive_commands\x18\x04 \x03(\t\x12\x18\n\x10working_dir_jail\x18\x05 \x01(\t\"\"\n\x04Mode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"x\n\x14RegistrationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x15\n\rerror_message\x18\x02 \x01(\t\x12\x12\n\nsession_id\x18\x03 \x01(\t\x12$\n\x06policy\x18\x04 \x01(\x0b\x32\x14.agent.SandboxPolicy\"\xff\x01\n\x11\x43lientTaskMessage\x12,\n\rtask_response\x18\x01 \x01(\x0b\x32\x13.agent.TaskResponseH\x00\x12-\n\ntask_claim\x18\x02 \x01(\x0b\x32\x17.agent.TaskClaimRequestH\x00\x12,\n\rbrowser_event\x18\x03 \x01(\x0b\x32\x13.agent.BrowserEventH\x00\x12\'\n\x08\x61nnounce\x18\x04 \x01(\x0b\x32\x13.agent.NodeAnnounceH\x00\x12+\n\tfile_sync\x18\x05 \x01(\x0b\x32\x16.agent.FileSyncMessageH\x00\x42\t\n\x07payload\"\x1f\n\x0cNodeAnnounce\x12\x0f\n\x07node_id\x18\x01 \x01(\t\"\x87\x01\n\x0c\x42rowserEvent\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12,\n\x0b\x63onsole_msg\x18\x02 \x01(\x0b\x32\x15.agent.ConsoleMessageH\x00\x12,\n\x0bnetwork_req\x18\x03 \x01(\x0b\x32\x15.agent.NetworkRequestH\x00\x42\x07\n\x05\x65vent\"\x8d\x02\n\x11ServerTaskMessage\x12*\n\x0ctask_request\x18\x01 \x01(\x0b\x32\x12.agent.TaskRequestH\x00\x12\x31\n\x10work_pool_update\x18\x02 \x01(\x0b\x32\x15.agent.WorkPoolUpdateH\x00\x12\x30\n\x0c\x63laim_status\x18\x03 \x01(\x0b\x32\x18.agent.TaskClaimResponseH\x00\x12/\n\x0btask_cancel\x18\x04 \x01(\x0b\x32\x18.agent.TaskCancelRequestH\x00\x12+\n\tfile_sync\x18\x05 \x01(\x0b\x32\x16.agent.FileSyncMessageH\x00\x42\t\n\x07payload\"$\n\x11TaskCancelRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\"\xd1\x01\n\x0bTaskRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x11\n\ttask_type\x18\x02 \x01(\t\x12\x16\n\x0cpayload_json\x18\x03 \x01(\tH\x00\x12.\n\x0e\x62rowser_action\x18\x07 \x01(\x0b\x32\x14.agent.BrowserActionH\x00\x12\x12\n\ntimeout_ms\x18\x04 \x01(\x05\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x11\n\tsignature\x18\x06 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\tB\t\n\x07payload\"\xa0\x02\n\rBrowserAction\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.agent.BrowserAction.ActionType\x12\x0b\n\x03url\x18\x02 \x01(\t\x12\x10\n\x08selector\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nsession_id\x18\x05 \x01(\t\x12\t\n\x01x\x18\x06 \x01(\x05\x12\t\n\x01y\x18\x07 \x01(\x05\"\x86\x01\n\nActionType\x12\x0c\n\x08NAVIGATE\x10\x00\x12\t\n\x05\x43LICK\x10\x01\x12\x08\n\x04TYPE\x10\x02\x12\x0e\n\nSCREENSHOT\x10\x03\x12\x0b\n\x07GET_DOM\x10\x04\x12\t\n\x05HOVER\x10\x05\x12\n\n\x06SCROLL\x10\x06\x12\t\n\x05\x43LOSE\x10\x07\x12\x08\n\x04\x45VAL\x10\x08\x12\x0c\n\x08GET_A11Y\x10\t\"\xe0\x02\n\x0cTaskResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12*\n\x06status\x18\x02 \x01(\x0e\x32\x1a.agent.TaskResponse.Status\x12\x0e\n\x06stdout\x18\x03 \x01(\t\x12\x0e\n\x06stderr\x18\x04 \x01(\t\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x35\n\tartifacts\x18\x06 \x03(\x0b\x32\".agent.TaskResponse.ArtifactsEntry\x12\x30\n\x0e\x62rowser_result\x18\x07 \x01(\x0b\x32\x16.agent.BrowserResponseH\x00\x1a\x30\n\x0e\x41rtifactsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"<\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x0b\n\x07TIMEOUT\x10\x02\x12\r\n\tCANCELLED\x10\x03\x42\x08\n\x06result\"\xdc\x01\n\x0f\x42rowserResponse\x12\x0b\n\x03url\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x10\n\x08snapshot\x18\x03 \x01(\x0c\x12\x13\n\x0b\x64om_content\x18\x04 \x01(\t\x12\x11\n\ta11y_tree\x18\x05 \x01(\t\x12\x13\n\x0b\x65val_result\x18\x06 \x01(\t\x12.\n\x0f\x63onsole_history\x18\x07 \x03(\x0b\x32\x15.agent.ConsoleMessage\x12.\n\x0fnetwork_history\x18\x08 \x03(\x0b\x32\x15.agent.NetworkRequest\"C\n\x0e\x43onsoleMessage\x12\r\n\x05level\x18\x01 \x01(\t\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x14\n\x0ctimestamp_ms\x18\x03 \x01(\x03\"h\n\x0eNetworkRequest\x12\x0e\n\x06method\x18\x01 \x01(\t\x12\x0b\n\x03url\x18\x02 \x01(\t\x12\x0e\n\x06status\x18\x03 \x01(\x05\x12\x15\n\rresource_type\x18\x04 \x01(\t\x12\x12\n\nlatency_ms\x18\x05 \x01(\x03\",\n\x0eWorkPoolUpdate\x12\x1a\n\x12\x61vailable_task_ids\x18\x01 \x03(\t\"4\n\x10TaskClaimRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\t\"E\n\x11TaskClaimResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07granted\x18\x02 \x01(\x08\x12\x0e\n\x06reason\x18\x03 \x01(\t\"\xc1\x01\n\tHeartbeat\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x19\n\x11\x63pu_usage_percent\x18\x02 \x01(\x02\x12\x1c\n\x14memory_usage_percent\x18\x03 \x01(\x02\x12\x1b\n\x13\x61\x63tive_worker_count\x18\x04 \x01(\x05\x12\x1b\n\x13max_worker_capacity\x18\x05 \x01(\x05\x12\x16\n\x0estatus_message\x18\x06 \x01(\t\x12\x18\n\x10running_task_ids\x18\x07 \x03(\t\"-\n\x13HealthCheckResponse\x12\x16\n\x0eserver_time_ms\x18\x01 \x01(\x03\"\xd3\x01\n\x0f\x46ileSyncMessage\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12,\n\x08manifest\x18\x02 \x01(\x0b\x32\x18.agent.DirectoryManifestH\x00\x12\'\n\tfile_data\x18\x03 \x01(\x0b\x32\x12.agent.FilePayloadH\x00\x12#\n\x06status\x18\x04 \x01(\x0b\x32\x11.agent.SyncStatusH\x00\x12%\n\x07\x63ontrol\x18\x05 \x01(\x0b\x32\x12.agent.SyncControlH\x00\x42\t\n\x07payload\"\xaf\x01\n\x0bSyncControl\x12)\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x19.agent.SyncControl.Action\x12\x0c\n\x04path\x18\x02 \x01(\t\"g\n\x06\x41\x63tion\x12\x12\n\x0eSTART_WATCHING\x10\x00\x12\x11\n\rSTOP_WATCHING\x10\x01\x12\x08\n\x04LOCK\x10\x02\x12\n\n\x06UNLOCK\x10\x03\x12\x14\n\x10REFRESH_MANIFEST\x10\x04\x12\n\n\x06RESYNC\x10\x05\"F\n\x11\x44irectoryManifest\x12\x11\n\troot_path\x18\x01 \x01(\t\x12\x1e\n\x05\x66iles\x18\x02 \x03(\x0b\x32\x0f.agent.FileInfo\"D\n\x08\x46ileInfo\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04hash\x18\x03 \x01(\t\x12\x0e\n\x06is_dir\x18\x04 \x01(\x08\"_\n\x0b\x46ilePayload\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\r\n\x05\x63hunk\x18\x02 \x01(\x0c\x12\x13\n\x0b\x63hunk_index\x18\x03 \x01(\x05\x12\x10\n\x08is_final\x18\x04 \x01(\x08\x12\x0c\n\x04hash\x18\x05 \x01(\t\"\xa0\x01\n\nSyncStatus\x12$\n\x04\x63ode\x18\x01 \x01(\x0e\x32\x16.agent.SyncStatus.Code\x12\x0f\n\x07message\x18\x02 \x01(\t\x12\x17\n\x0freconcile_paths\x18\x03 \x03(\t\"B\n\x04\x43ode\x12\x06\n\x02OK\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x16\n\x12RECONCILE_REQUIRED\x10\x02\x12\x0f\n\x0bIN_PROGRESS\x10\x03\x32\xe9\x01\n\x11\x41gentOrchestrator\x12L\n\x11SyncConfiguration\x12\x1a.agent.RegistrationRequest\x1a\x1b.agent.RegistrationResponse\x12\x44\n\nTaskStream\x12\x18.agent.ClientTaskMessage\x1a\x18.agent.ServerTaskMessage(\x01\x30\x01\x12@\n\x0cReportHealth\x12\x10.agent.Heartbeat\x1a\x1a.agent.HealthCheckResponse(\x01\x30\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -76,19 +76,19 @@ _globals['_FILESYNCMESSAGE']._serialized_start=2986 _globals['_FILESYNCMESSAGE']._serialized_end=3197 _globals['_SYNCCONTROL']._serialized_start=3200 - _globals['_SYNCCONTROL']._serialized_end=3363 + _globals['_SYNCCONTROL']._serialized_end=3375 _globals['_SYNCCONTROL_ACTION']._serialized_start=3272 - _globals['_SYNCCONTROL_ACTION']._serialized_end=3363 - _globals['_DIRECTORYMANIFEST']._serialized_start=3365 - _globals['_DIRECTORYMANIFEST']._serialized_end=3435 - _globals['_FILEINFO']._serialized_start=3437 - _globals['_FILEINFO']._serialized_end=3505 - _globals['_FILEPAYLOAD']._serialized_start=3507 - _globals['_FILEPAYLOAD']._serialized_end=3602 - _globals['_SYNCSTATUS']._serialized_start=3605 - _globals['_SYNCSTATUS']._serialized_end=3740 - _globals['_SYNCSTATUS_CODE']._serialized_start=3674 - _globals['_SYNCSTATUS_CODE']._serialized_end=3740 - _globals['_AGENTORCHESTRATOR']._serialized_start=3743 - _globals['_AGENTORCHESTRATOR']._serialized_end=3976 + _globals['_SYNCCONTROL_ACTION']._serialized_end=3375 + _globals['_DIRECTORYMANIFEST']._serialized_start=3377 + _globals['_DIRECTORYMANIFEST']._serialized_end=3447 + _globals['_FILEINFO']._serialized_start=3449 + _globals['_FILEINFO']._serialized_end=3517 + _globals['_FILEPAYLOAD']._serialized_start=3519 + _globals['_FILEPAYLOAD']._serialized_end=3614 + _globals['_SYNCSTATUS']._serialized_start=3617 + _globals['_SYNCSTATUS']._serialized_end=3777 + _globals['_SYNCSTATUS_CODE']._serialized_start=3711 + _globals['_SYNCSTATUS_CODE']._serialized_end=3777 + _globals['_AGENTORCHESTRATOR']._serialized_start=3780 + _globals['_AGENTORCHESTRATOR']._serialized_end=4013 # @@protoc_insertion_point(module_scope) diff --git a/poc-grpc-agent/test_mesh.py b/poc-grpc-agent/test_mesh.py index 012cf8e..260a82d 100644 --- a/poc-grpc-agent/test_mesh.py +++ b/poc-grpc-agent/test_mesh.py @@ -83,9 +83,14 @@ time.sleep(20) # Wait for Lock reliably # 4. Workspace LOCK Test print(f"\n[🔒] User Sim: Node Alpha should be LOCKED by Orchestrator now...") - locked_file = os.path.join(root_alpha, "hello.py") with open(locked_file, "a") as f: f.write("\n# USER TRYING TO EDIT WHILE LOCKED\n") + + time.sleep(15) + # 5. Drift Recovery Test + print(f"\n[💥] User Sim: Corrupting {sync_file} on Node Alpha (Simulating Sync Drift)...") + with open(sync_file, "w") as f: + f.write("CORRUPTED CONTENT") threading.Thread(target=simulate_local_edit, daemon=True).start() diff --git a/poc-grpc-agent/test_recovery.py b/poc-grpc-agent/test_recovery.py new file mode 100644 index 0000000..8f96bd8 --- /dev/null +++ b/poc-grpc-agent/test_recovery.py @@ -0,0 +1,80 @@ +import time +import subprocess +import os +import shutil + +def run_recovery_test(): + print("[🚀] Starting Ghost Mirror Recovery Test...") + + # 0. Prep Mirror (Server side) + mirror_dir = "/app/data/mirrors/recovery-session" + os.makedirs(mirror_dir, exist_ok=True) + with open(os.path.join(mirror_dir, "app.py"), "w") as f: + f.write("print('v1')") + + # 1. Start Orchestrator + print("[🛡️] Orchestrator: Starting...") + orchestrator = subprocess.Popen( + ["python3", "-m", "orchestrator.app"], + cwd="/app/poc-grpc-agent", + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 + ) + time.sleep(3) + + # 2. Start Node Alpha + print("[🤖] Node Alpha: Starting...") + node_alpha_sync = "/tmp/cortex-sync-recovery" + if os.path.exists(node_alpha_sync): shutil.rmtree(node_alpha_sync) + + node1 = subprocess.Popen( + ["python3", "-m", "agent_node.main"], + cwd="/app/poc-grpc-agent", + env={**os.environ, "AGENT_NODE_ID": "node-alpha", "CORTEX_SYNC_DIR": node_alpha_sync}, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 + ) + + time.sleep(10) # Wait for initial sync + print("[✅] Initial Sync should be done.") + + # 3. Stop Node Alpha + print("[🛑] Stopping Node Alpha...") + node1.terminate() + node1.wait() + + # 4. Modify Server Mirror (Simulate updates while node is offline) + print("[📝] Updating Server Mirror to v2...") + with open(os.path.join(mirror_dir, "app.py"), "w") as f: + f.write("print('v2')") + + time.sleep(2) + + # 5. Restart Node Alpha + print("[🤖] Node Alpha: Restarting...") + node1_v2 = subprocess.Popen( + ["python3", "-m", "agent_node.main"], + cwd="/app/poc-grpc-agent", + env={**os.environ, "AGENT_NODE_ID": "node-alpha", "CORTEX_SYNC_DIR": node_alpha_sync}, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 + ) + + # 6. Wait and Check + time.sleep(15) + print("\n[📊] Checking Results...") + node_file = os.path.join(node_alpha_sync, "recovery-session", "app.py") + if os.path.exists(node_file): + with open(node_file, "r") as f: + content = f.read() + print(f"Node Content: {content}") + if content == "print('v2')": + print("[🏆] RECOVERY SUCCESSFUL!") + else: + print("[❌] RECOVERY FAILED - Content mismatch") + else: + print("[❌] RECOVERY FAILED - File not found") + + orchestrator.terminate() + node1_v2.terminate() + print("[✅] Done.") + +if __name__ == "__main__": + run_recovery_test()