diff --git a/.agent/utils/gitbucket/add_comment.py b/.agent/utils/gitbucket/add_comment.py index 80d1666..8f6ef4e 100644 --- a/.agent/utils/gitbucket/add_comment.py +++ b/.agent/utils/gitbucket/add_comment.py @@ -21,6 +21,8 @@ sys.exit(1) url = f"https://gitbucket.jerxie.com/api/v3/repos/{repo}/issues/{issue_number}/comments" + # Support literal \n passed from shell + body = body.replace("\\n", "\n") data = {"body": body} req = urllib.request.Request(url, json.dumps(data).encode("utf-8"), headers={ diff --git a/.agent/utils/gitbucket/create_issue.py b/.agent/utils/gitbucket/create_issue.py index 6116285..2692f68 100644 --- a/.agent/utils/gitbucket/create_issue.py +++ b/.agent/utils/gitbucket/create_issue.py @@ -21,6 +21,8 @@ sys.exit(1) url = f"https://gitbucket.jerxie.com/api/v3/repos/{repo}/issues" + # Support literal \n passed from shell + body = body.replace("\\n", "\n") data = {"title": title, "body": body} req = urllib.request.Request(url, json.dumps(data).encode("utf-8"), headers={ diff --git a/docs/architecture/cortex_project_todo.md b/docs/architecture/cortex_project_todo.md index 26308ff..926c2fe 100644 --- a/docs/architecture/cortex_project_todo.md +++ b/docs/architecture/cortex_project_todo.md @@ -4,9 +4,18 @@ ## 🚀 High Priority (Infrastructure) -### [ ] Persistent Sub-Worker Bridges (CDP/LSP) - 🟢 CURRENT FOCUS -- **Description**: Implement long-running "Skill Bridges" for Browser automation (CDP) and Code Intelligence (LSP). -- **Goal**: Support the **Antigravity Browser Skill** by maintaining a persistent browser session rather than spawning a new process per task. +### [ ] Persistent Sub-Worker Bridges (CDP/LSP) - ✅ FOUNDATIONS BUILT +- **Status**: Basic Navigation, Screenshotting, and Persistent Session state implemented. +- **Goal**: Support a professional, high-fidelity **Antigravity Browser Skill**. + +#### 🌐 Comprehensive Browser Skill Requirements: +- **[ ] JS Console Tunnel**: Pipe `console.log/error` from the browser back to the server in real-time. +- **[ ] Network Observability**: Capture and return XHR/Fetch traffic (HAR or failed requests only) for AI debugging. +- **[ ] A11y Tree Perception**: Provide the **Accessibility Tree** (JSON) to the AI instead of just raw HTML/DOM for better semantic understanding. +- **[ ] Advanced Interactions**: Support `Hover`, `Scroll`, `Drag & Drop`, and `Multi-key` complex input. +- **[ ] EVAL Skill**: Allow the AI to inject and execute arbitrary JavaScript (`page.evaluate()`) to extract data or trigger events. +- **[ ] Smart Wait Logic**: Implement `wait_for_network_idle`, `wait_for_selector`, and custom predicates to reduce task flakiness. +- **[ ] Artifact Extraction**: Export high-definition **Videos** (chunked) and **HAR** files for audit trails. ### [ ] Multi-Tenancy & Resource Isolation - **Description**: Isolate node groups by user/tenant and enforce hardware quotas. @@ -14,8 +23,13 @@ ### [ ] Binary Artifact & Large Data Handling (Chunking) - **Description**: Implement gRPC stream-based chunking for large artifacts. +- **Specific Case**: Support high-fidelity **Video Recordings** from Browser sessions (multi-GB files). - **Requirement**: **Transparency**. The Main AI should just see a "File" result; reassembly happens at the server layer. +### [ ] Architectural Refinement: Unified Worker Shim +- **Description**: Re-evaluate the "Skill" abstraction. Move towards a model where each task is a specialized worker process that decides its capability (Shell vs Playwright) at startup. +- **Goal**: Simplifies context isolation and reduces manager-thread overhead. + ### [ ] Graceful Shutdown & Local Task Persistence (Built-in) - **Description**: Handle node interrupts (SIGTERM/SIGINT) to allow workers to finish or checkpoint. Store a local `task_history.json` on the node to recover state after crash/restart. diff --git a/poc-grpc-agent/agent_pb2.py b/poc-grpc-agent/agent_pb2.py index 7fbdfc9..a0eacb3 100644 --- a/poc-grpc-agent/agent_pb2.py +++ b/poc-grpc-agent/agent_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\xde\x01\n\x13RegistrationRequest\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nauth_token\x18\x03 \x01(\t\x12\x18\n\x10node_description\x18\x04 \x01(\t\x12\x42\n\x0c\x63\x61pabilities\x18\x05 \x03(\x0b\x32,.agent.RegistrationRequest.CapabilitiesEntry\x1a\x33\n\x11\x43\x61pabilitiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xc5\x01\n\rSandboxPolicy\x12\'\n\x04mode\x18\x01 \x01(\x0e\x32\x19.agent.SandboxPolicy.Mode\x12\x18\n\x10\x61llowed_commands\x18\x02 \x03(\t\x12\x17\n\x0f\x64\x65nied_commands\x18\x03 \x03(\t\x12\x1a\n\x12sensitive_commands\x18\x04 \x03(\t\x12\x18\n\x10working_dir_jail\x18\x05 \x01(\t\"\"\n\x04Mode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"x\n\x14RegistrationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x15\n\rerror_message\x18\x02 \x01(\t\x12\x12\n\nsession_id\x18\x03 \x01(\t\x12$\n\x06policy\x18\x04 \x01(\x0b\x32\x14.agent.SandboxPolicy\"{\n\x11\x43lientTaskMessage\x12,\n\rtask_response\x18\x01 \x01(\x0b\x32\x13.agent.TaskResponseH\x00\x12-\n\ntask_claim\x18\x02 \x01(\x0b\x32\x17.agent.TaskClaimRequestH\x00\x42\t\n\x07payload\"\xe0\x01\n\x11ServerTaskMessage\x12*\n\x0ctask_request\x18\x01 \x01(\x0b\x32\x12.agent.TaskRequestH\x00\x12\x31\n\x10work_pool_update\x18\x02 \x01(\x0b\x32\x15.agent.WorkPoolUpdateH\x00\x12\x30\n\x0c\x63laim_status\x18\x03 \x01(\x0b\x32\x18.agent.TaskClaimResponseH\x00\x12/\n\x0btask_cancel\x18\x04 \x01(\x0b\x32\x18.agent.TaskCancelRequestH\x00\x42\t\n\x07payload\"$\n\x11TaskCancelRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\"\x80\x01\n\x0bTaskRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x11\n\ttask_type\x18\x02 \x01(\t\x12\x14\n\x0cpayload_json\x18\x03 \x01(\t\x12\x12\n\ntimeout_ms\x18\x04 \x01(\x05\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x11\n\tsignature\x18\x06 \x01(\t\"\xa4\x02\n\x0cTaskResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12*\n\x06status\x18\x02 \x01(\x0e\x32\x1a.agent.TaskResponse.Status\x12\x0e\n\x06stdout\x18\x03 \x01(\t\x12\x0e\n\x06stderr\x18\x04 \x01(\t\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x35\n\tartifacts\x18\x06 \x03(\x0b\x32\".agent.TaskResponse.ArtifactsEntry\x1a\x30\n\x0e\x41rtifactsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"<\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x0b\n\x07TIMEOUT\x10\x02\x12\r\n\tCANCELLED\x10\x03\",\n\x0eWorkPoolUpdate\x12\x1a\n\x12\x61vailable_task_ids\x18\x01 \x03(\t\"4\n\x10TaskClaimRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\t\"E\n\x11TaskClaimResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07granted\x18\x02 \x01(\x08\x12\x0e\n\x06reason\x18\x03 \x01(\t\"\xc1\x01\n\tHeartbeat\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x19\n\x11\x63pu_usage_percent\x18\x02 \x01(\x02\x12\x1c\n\x14memory_usage_percent\x18\x03 \x01(\x02\x12\x1b\n\x13\x61\x63tive_worker_count\x18\x04 \x01(\x05\x12\x1b\n\x13max_worker_capacity\x18\x05 \x01(\x05\x12\x16\n\x0estatus_message\x18\x06 \x01(\t\x12\x18\n\x10running_task_ids\x18\x07 \x03(\t\"-\n\x13HealthCheckResponse\x12\x16\n\x0eserver_time_ms\x18\x01 \x01(\x03\x32\xe9\x01\n\x11\x41gentOrchestrator\x12L\n\x11SyncConfiguration\x12\x1a.agent.RegistrationRequest\x1a\x1b.agent.RegistrationResponse\x12\x44\n\nTaskStream\x12\x18.agent.ClientTaskMessage\x1a\x18.agent.ServerTaskMessage(\x01\x30\x01\x12@\n\x0cReportHealth\x12\x10.agent.Heartbeat\x1a\x1a.agent.HealthCheckResponse(\x01\x30\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61gent.proto\x12\x05\x61gent\"\xde\x01\n\x13RegistrationRequest\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nauth_token\x18\x03 \x01(\t\x12\x18\n\x10node_description\x18\x04 \x01(\t\x12\x42\n\x0c\x63\x61pabilities\x18\x05 \x03(\x0b\x32,.agent.RegistrationRequest.CapabilitiesEntry\x1a\x33\n\x11\x43\x61pabilitiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xc5\x01\n\rSandboxPolicy\x12\'\n\x04mode\x18\x01 \x01(\x0e\x32\x19.agent.SandboxPolicy.Mode\x12\x18\n\x10\x61llowed_commands\x18\x02 \x03(\t\x12\x17\n\x0f\x64\x65nied_commands\x18\x03 \x03(\t\x12\x1a\n\x12sensitive_commands\x18\x04 \x03(\t\x12\x18\n\x10working_dir_jail\x18\x05 \x01(\t\"\"\n\x04Mode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"x\n\x14RegistrationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x15\n\rerror_message\x18\x02 \x01(\t\x12\x12\n\nsession_id\x18\x03 \x01(\t\x12$\n\x06policy\x18\x04 \x01(\x0b\x32\x14.agent.SandboxPolicy\"{\n\x11\x43lientTaskMessage\x12,\n\rtask_response\x18\x01 \x01(\x0b\x32\x13.agent.TaskResponseH\x00\x12-\n\ntask_claim\x18\x02 \x01(\x0b\x32\x17.agent.TaskClaimRequestH\x00\x42\t\n\x07payload\"\xe0\x01\n\x11ServerTaskMessage\x12*\n\x0ctask_request\x18\x01 \x01(\x0b\x32\x12.agent.TaskRequestH\x00\x12\x31\n\x10work_pool_update\x18\x02 \x01(\x0b\x32\x15.agent.WorkPoolUpdateH\x00\x12\x30\n\x0c\x63laim_status\x18\x03 \x01(\x0b\x32\x18.agent.TaskClaimResponseH\x00\x12/\n\x0btask_cancel\x18\x04 \x01(\x0b\x32\x18.agent.TaskCancelRequestH\x00\x42\t\n\x07payload\"$\n\x11TaskCancelRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\"\xbd\x01\n\x0bTaskRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x11\n\ttask_type\x18\x02 \x01(\t\x12\x16\n\x0cpayload_json\x18\x03 \x01(\tH\x00\x12.\n\x0e\x62rowser_action\x18\x07 \x01(\x0b\x32\x14.agent.BrowserActionH\x00\x12\x12\n\ntimeout_ms\x18\x04 \x01(\x05\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x11\n\tsignature\x18\x06 \x01(\tB\t\n\x07payload\"\xda\x01\n\rBrowserAction\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.agent.BrowserAction.ActionType\x12\x0b\n\x03url\x18\x02 \x01(\t\x12\x10\n\x08selector\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nsession_id\x18\x05 \x01(\t\"W\n\nActionType\x12\x0c\n\x08NAVIGATE\x10\x00\x12\t\n\x05\x43LICK\x10\x01\x12\x08\n\x04TYPE\x10\x02\x12\x0e\n\nSCREENSHOT\x10\x03\x12\x0b\n\x07GET_DOM\x10\x04\x12\t\n\x05\x43LOSE\x10\x07\"\xe0\x02\n\x0cTaskResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12*\n\x06status\x18\x02 \x01(\x0e\x32\x1a.agent.TaskResponse.Status\x12\x0e\n\x06stdout\x18\x03 \x01(\t\x12\x0e\n\x06stderr\x18\x04 \x01(\t\x12\x10\n\x08trace_id\x18\x05 \x01(\t\x12\x35\n\tartifacts\x18\x06 \x03(\x0b\x32\".agent.TaskResponse.ArtifactsEntry\x12\x30\n\x0e\x62rowser_result\x18\x07 \x01(\x0b\x32\x16.agent.BrowserResponseH\x00\x1a\x30\n\x0e\x41rtifactsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"<\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\t\n\x05\x45RROR\x10\x01\x12\x0b\n\x07TIMEOUT\x10\x02\x12\r\n\tCANCELLED\x10\x03\x42\x08\n\x06result\"T\n\x0f\x42rowserResponse\x12\x0b\n\x03url\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\x10\n\x08snapshot\x18\x03 \x01(\x0c\x12\x13\n\x0b\x64om_content\x18\x04 \x01(\t\",\n\x0eWorkPoolUpdate\x12\x1a\n\x12\x61vailable_task_ids\x18\x01 \x03(\t\"4\n\x10TaskClaimRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\t\"E\n\x11TaskClaimResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x0f\n\x07granted\x18\x02 \x01(\x08\x12\x0e\n\x06reason\x18\x03 \x01(\t\"\xc1\x01\n\tHeartbeat\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\x19\n\x11\x63pu_usage_percent\x18\x02 \x01(\x02\x12\x1c\n\x14memory_usage_percent\x18\x03 \x01(\x02\x12\x1b\n\x13\x61\x63tive_worker_count\x18\x04 \x01(\x05\x12\x1b\n\x13max_worker_capacity\x18\x05 \x01(\x05\x12\x16\n\x0estatus_message\x18\x06 \x01(\t\x12\x18\n\x10running_task_ids\x18\x07 \x03(\t\"-\n\x13HealthCheckResponse\x12\x16\n\x0eserver_time_ms\x18\x01 \x01(\x03\x32\xe9\x01\n\x11\x41gentOrchestrator\x12L\n\x11SyncConfiguration\x12\x1a.agent.RegistrationRequest\x1a\x1b.agent.RegistrationResponse\x12\x44\n\nTaskStream\x12\x18.agent.ClientTaskMessage\x1a\x18.agent.ServerTaskMessage(\x01\x30\x01\x12@\n\x0cReportHealth\x12\x10.agent.Heartbeat\x1a\x1a.agent.HealthCheckResponse(\x01\x30\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -42,23 +42,29 @@ _globals['_TASKCANCELREQUEST']._serialized_start=921 _globals['_TASKCANCELREQUEST']._serialized_end=957 _globals['_TASKREQUEST']._serialized_start=960 - _globals['_TASKREQUEST']._serialized_end=1088 - _globals['_TASKRESPONSE']._serialized_start=1091 - _globals['_TASKRESPONSE']._serialized_end=1383 - _globals['_TASKRESPONSE_ARTIFACTSENTRY']._serialized_start=1273 - _globals['_TASKRESPONSE_ARTIFACTSENTRY']._serialized_end=1321 - _globals['_TASKRESPONSE_STATUS']._serialized_start=1323 - _globals['_TASKRESPONSE_STATUS']._serialized_end=1383 - _globals['_WORKPOOLUPDATE']._serialized_start=1385 - _globals['_WORKPOOLUPDATE']._serialized_end=1429 - _globals['_TASKCLAIMREQUEST']._serialized_start=1431 - _globals['_TASKCLAIMREQUEST']._serialized_end=1483 - _globals['_TASKCLAIMRESPONSE']._serialized_start=1485 - _globals['_TASKCLAIMRESPONSE']._serialized_end=1554 - _globals['_HEARTBEAT']._serialized_start=1557 - _globals['_HEARTBEAT']._serialized_end=1750 - _globals['_HEALTHCHECKRESPONSE']._serialized_start=1752 - _globals['_HEALTHCHECKRESPONSE']._serialized_end=1797 - _globals['_AGENTORCHESTRATOR']._serialized_start=1800 - _globals['_AGENTORCHESTRATOR']._serialized_end=2033 + _globals['_TASKREQUEST']._serialized_end=1149 + _globals['_BROWSERACTION']._serialized_start=1152 + _globals['_BROWSERACTION']._serialized_end=1370 + _globals['_BROWSERACTION_ACTIONTYPE']._serialized_start=1283 + _globals['_BROWSERACTION_ACTIONTYPE']._serialized_end=1370 + _globals['_TASKRESPONSE']._serialized_start=1373 + _globals['_TASKRESPONSE']._serialized_end=1725 + _globals['_TASKRESPONSE_ARTIFACTSENTRY']._serialized_start=1605 + _globals['_TASKRESPONSE_ARTIFACTSENTRY']._serialized_end=1653 + _globals['_TASKRESPONSE_STATUS']._serialized_start=1655 + _globals['_TASKRESPONSE_STATUS']._serialized_end=1715 + _globals['_BROWSERRESPONSE']._serialized_start=1727 + _globals['_BROWSERRESPONSE']._serialized_end=1811 + _globals['_WORKPOOLUPDATE']._serialized_start=1813 + _globals['_WORKPOOLUPDATE']._serialized_end=1857 + _globals['_TASKCLAIMREQUEST']._serialized_start=1859 + _globals['_TASKCLAIMREQUEST']._serialized_end=1911 + _globals['_TASKCLAIMRESPONSE']._serialized_start=1913 + _globals['_TASKCLAIMRESPONSE']._serialized_end=1982 + _globals['_HEARTBEAT']._serialized_start=1985 + _globals['_HEARTBEAT']._serialized_end=2178 + _globals['_HEALTHCHECKRESPONSE']._serialized_start=2180 + _globals['_HEALTHCHECKRESPONSE']._serialized_end=2225 + _globals['_AGENTORCHESTRATOR']._serialized_start=2228 + _globals['_AGENTORCHESTRATOR']._serialized_end=2461 # @@protoc_insertion_point(module_scope) diff --git a/poc-grpc-agent/client.py b/poc-grpc-agent/client.py index b50b9a0..a4950a8 100644 --- a/poc-grpc-agent/client.py +++ b/poc-grpc-agent/client.py @@ -14,6 +14,7 @@ import sys import platform from concurrent import futures +from playwright.sync_api import sync_playwright SECRET_KEY = "cortex-secret-shared-key" @@ -33,8 +34,7 @@ def execute(self, task, sandbox, on_complete): try: - payload = json.loads(task.payload_json) - cmd = payload.get("command") + cmd = task.payload_json allowed, status_msg = sandbox.verify(cmd) if not allowed: @@ -57,6 +57,72 @@ finally: with self.lock: self.processes.pop(task.task_id, None) +class BrowserSkill(BaseSkill): + """The 'Antigravity Bridge': Persistent Browser Skill.""" + def __init__(self): + self.lock = threading.Lock() + self.pw = None + self.browser = None + self.sessions = {} # session_id -> { "context": Context, "page": Page } + + def _ensure_browser(self): + if not self.pw: + self.pw = sync_playwright().start() + self.browser = self.pw.chromium.launch(headless=True) + + def execute(self, task, sandbox, on_complete): + try: + self._ensure_browser() + action = task.browser_action + sid = action.session_id or "default" + + with self.lock: + if sid not in self.sessions: + context = self.browser.new_context() + page = context.new_page() + self.sessions[sid] = {"context": context, "page": page} + + page = self.sessions[sid]["page"] + + print(f" [🌐] Browser Action: {agent_pb2.BrowserAction.ActionType.Name(action.action)} | Session: {sid}") + + res_data = {"url": page.url, "title": page.title()} + + if action.action == agent_pb2.BrowserAction.NAVIGATE: + page.goto(action.url, wait_until="domcontentloaded") + elif action.action == agent_pb2.BrowserAction.CLICK: + page.click(action.selector) + elif action.action == agent_pb2.BrowserAction.TYPE: + page.fill(action.selector, action.text) + elif action.action == agent_pb2.BrowserAction.SCREENSHOT: + res_data["snapshot"] = page.screenshot() + elif action.action == agent_pb2.BrowserAction.GET_DOM: + res_data["dom_content"] = page.content() + elif action.action == agent_pb2.BrowserAction.CLOSE: + with self.lock: + sess = self.sessions.pop(sid, None) + if sess: sess["context"].close() + + # Refresh metadata after action + res_data["url"] = page.url + res_data["title"] = page.title() + + browser_res = agent_pb2.BrowserResponse( + url=res_data["url"], title=res_data["title"], + snapshot=res_data.get("snapshot", b""), + dom_content=res_data.get("dom_content", "") + ) + on_complete(task.task_id, {"status": 1, "browser_result": browser_res}, task.trace_id) + + except Exception as e: + print(f" [!] Browser Error: {e}") + on_complete(task.task_id, {"stderr": str(e), "status": 2}, task.trace_id) + + def cancel(self, task_id): + # Playwright sync actions are typically blocking in the thread, + # but we can close the contexts if needed. For now, we trust the timeout. + return False + def cancel(self, task_id): with self.lock: p = self.processes.get(task_id) @@ -71,7 +137,10 @@ def __init__(self, max_workers=5): self.executor = futures.ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="skill-worker") self.active_tasks = {} # task_id -> future - self.skills = {"shell": ShellSkill()} + self.skills = { + "shell": ShellSkill(), + "browser": BrowserSkill() + } self.max_workers = max_workers self.lock = threading.Lock() @@ -80,8 +149,11 @@ if len(self.active_tasks) >= self.max_workers: return False, "Node Capacity Reached" - # Determine Skill (Default to shell for now) - skill = self.skills.get("shell") + # Decide Skill + if task.HasField("browser_action") or task.task_type == "browser": + skill = self.skills["browser"] + else: + skill = self.skills["shell"] future = self.executor.submit(skill.execute, task, sandbox, on_complete) self.active_tasks[task.task_id] = future @@ -143,7 +215,8 @@ def sync_configuration(self): print(f"[*] Handshake: {self.node_id}") reg = agent_pb2.RegistrationRequest(node_id=self.node_id, auth_token=self._create_token(), - node_description="Refactored Stateful Node", capabilities={"shell": "v1"}) + node_description="Refactored Stateful Node with Browser Skill", + capabilities={"shell": "v1", "browser": "playwright-1.42"}) res = self.stub.SyncConfiguration(reg) if res.success: self.sandbox.sync(res.policy); print("[OK] Policy Synced.") else: print(f"[!] Rejected: {res.error_message}"); sys.exit(1) @@ -175,16 +248,30 @@ self.task_queue.put(agent_pb2.ClientTaskMessage(task_claim=agent_pb2.TaskClaimRequest(task_id=tid, node_id=self.node_id))) def _handle_task(self, task): - # Sig Verify - sig = hmac.new(SECRET_KEY.encode(), task.payload_json.encode(), hashlib.sha256).hexdigest() - if not hmac.compare_digest(task.signature, sig): return print("[!] Sig Fail") + # Sig Verify logic based on payload type + if task.HasField("browser_action"): + a = task.browser_action + sign_base = f"{a.action}:{a.url}:{a.session_id}".encode() + else: + sign_base = task.payload_json.encode() + + expected_sig = hmac.new(SECRET_KEY.encode(), sign_base, hashlib.sha256).hexdigest() + if not hmac.compare_digest(task.signature, expected_sig): + return print(f"[!] Sig Fail for {task.task_id}") self.skills.submit(task, self.sandbox, self._on_finish) def _on_finish(self, tid, res, trace): print(f"[*] Task {tid} finished.") status = agent_pb2.TaskResponse.SUCCESS if res['status'] == 1 else agent_pb2.TaskResponse.ERROR - tr = agent_pb2.TaskResponse(task_id=tid, status=status, stdout=res.get('stdout',''), stderr=res.get('stderr',''), trace_id=trace) + + tr = agent_pb2.TaskResponse( + task_id=tid, status=status, + stdout=res.get('stdout',''), + stderr=res.get('stderr',''), + trace_id=trace, + browser_result=res.get("browser_result") + ) self.task_queue.put(agent_pb2.ClientTaskMessage(task_response=tr)) if __name__ == '__main__': diff --git a/poc-grpc-agent/protos/agent.proto b/poc-grpc-agent/protos/agent.proto index a8629ae..3855527 100644 --- a/poc-grpc-agent/protos/agent.proto +++ b/poc-grpc-agent/protos/agent.proto @@ -66,12 +66,31 @@ message TaskRequest { string task_id = 1; string task_type = 2; - string payload_json = 3; + oneof payload { + string payload_json = 3; // For legacy shell/fallback + BrowserAction browser_action = 7; // NEW: Structured Browser Skill + } int32 timeout_ms = 4; string trace_id = 5; string signature = 6; } +message BrowserAction { + enum ActionType { + NAVIGATE = 0; + CLICK = 1; + TYPE = 2; + SCREENSHOT = 3; + GET_DOM = 4; + CLOSE = 7; + } + ActionType action = 1; + string url = 2; + string selector = 3; + string text = 4; + string session_id = 5; // For persistent "Antigravity Bridge" continuity +} + message TaskResponse { string task_id = 1; enum Status { @@ -85,6 +104,18 @@ string stderr = 4; string trace_id = 5; map artifacts = 6; + + // NEW: Structured Skill Results + oneof result { + BrowserResponse browser_result = 7; + } +} + +message BrowserResponse { + string url = 1; + string title = 2; + bytes snapshot = 3; // Small snapshots; large ones use 'artifacts' chunking + string dom_content = 4; } message WorkPoolUpdate { diff --git a/poc-grpc-agent/requirements.txt b/poc-grpc-agent/requirements.txt index 2366f18..6e33b6d 100644 --- a/poc-grpc-agent/requirements.txt +++ b/poc-grpc-agent/requirements.txt @@ -1,2 +1,4 @@ grpcio==1.62.1 grpcio-tools==1.62.1 +PyJWT==2.8.0 +playwright==1.42.0 diff --git a/poc-grpc-agent/server.py b/poc-grpc-agent/server.py index c3ac742..2280a72 100644 --- a/poc-grpc-agent/server.py +++ b/poc-grpc-agent/server.py @@ -111,6 +111,28 @@ self.journal.pop(tid) return {"error": "Timeout"} + def dispatch_browser(self, node_id, action, timeout=60): + node = self.registry.get_node(node_id) + if not node: return {"error": "Offline"} + + tid = f"br-{int(time.time()*1000)}" + event = self.journal.register(tid, node_id) + + # Basic signature for POC: Sign the action enum name + URL + sign_base = f"{action.action}:{action.url}:{action.session_id}" + sig = hmac.new(SECRET_KEY.encode(), sign_base.encode(), hashlib.sha256).hexdigest() + + req = agent_pb2.ServerTaskMessage(task_request=agent_pb2.TaskRequest( + task_id=tid, browser_action=action, signature=sig)) + + node["queue"].put(req) + if event.wait(timeout): + res = self.journal.get_result(tid) + self.journal.pop(tid) + return res + self.journal.pop(tid) + return {"error": "Timeout"} + class AgentOrchestrator(agent_pb2_grpc.AgentOrchestratorServicer): def __init__(self): self.registry = MemoryNodeRegistry() @@ -140,7 +162,11 @@ self.registry.get_node(node_id)["queue"].put(agent_pb2.ServerTaskMessage( task_request=agent_pb2.TaskRequest(task_id=msg.task_claim.task_id, payload_json=payload, signature=sig))) elif kind == 'task_response': - self.journal.fulfill(msg.task_response.task_id, {"stdout": msg.task_response.stdout, "status": msg.task_response.status}) + res_obj = {"stdout": msg.task_response.stdout, "status": msg.task_response.status} + if msg.task_response.HasField("browser_result"): + br = msg.task_response.browser_result + res_obj["browser"] = {"url": br.url, "title": br.title, "has_snapshot": len(br.snapshot) > 0} + self.journal.fulfill(msg.task_response.task_id, res_obj) threading.Thread(target=_read, daemon=True).start() @@ -180,6 +206,24 @@ print("\n[🧠] AI Simulation Start...") print(f" Whoami: {orch.assistant.dispatch_single('agent-node-007', 'whoami')}") + # NEW: Browser Phase + print("\n[🧠] AI Phase 4: Navigating Browser (Antigravity Bridge)...") + nav_action = agent_pb2.BrowserAction( + action=agent_pb2.BrowserAction.NAVIGATE, + url="https://example.com", + session_id="antigravity-session-1" + ) + res_nav = orch.assistant.dispatch_browser("agent-node-007", nav_action) + print(f" Nav Result: {res_nav}") + + print("\n[🧠] AI Phase 5: Multi-Action Persistence (Screenshot)...") + snap_action = agent_pb2.BrowserAction( + action=agent_pb2.BrowserAction.SCREENSHOT, + session_id="antigravity-session-1" + ) + res_snap = orch.assistant.dispatch_browser("agent-node-007", snap_action) + print(f" Snap Result: {res_snap.get('browser', {}).get('title')} | Snapshot captured: {res_snap.get('browser', {}).get('has_snapshot')}") + server.wait_for_termination() if __name__ == '__main__':