Newer
Older
cortex-hub / ai-hub / integration_tests / test_browser_llm.py
import os
import httpx
import pytest
import json

BASE_URL = os.getenv("SYNC_TEST_BASE_URL", "http://127.0.0.1:8002/api/v1")

def _headers():
    return {
        "X-User-ID": os.environ.get("SYNC_TEST_USER_ID", "")
    }

@pytest.mark.skipif(os.getenv("SKIP_DOCKER_NODES", "false").lower() == "true", reason="Browser skill requires a fully-loaded Docker container environment to access Chromium.")
def test_browser_skill_weather():
    """
    Test explicitly asking the LLM context to leverage its browser skill 
    to fetch real-time data indicating that tool resolution and execution works.
    """
    user_id = os.environ.get("SYNC_TEST_USER_ID", "")
    assert user_id, "User ID not found in environment from conftest."
    
    with httpx.Client(timeout=45.0) as client:
        # Step 1: Create a new session bound to Gemini
        session_payload = {
            "user_id": user_id,
            "provider_name": "gemini",
            "feature_name": "agent_harness"
        }
        r_sess = client.post(f"{BASE_URL}/sessions/", headers=_headers(), json=session_payload)
        assert r_sess.status_code == 200, f"Failed to create session: {r_sess.text}"
        
        session_id = r_sess.json()["id"]
        
        # Step 2: Ask a question that requires the browser to take a snapshot/screenshot
        # We explicitly ask it to navigate and snapshot to guarantee image output.
        chat_payload = {
            "prompt": "Use your browser_automation_agent tool to navigate to https://example.com, take a screenshot of the page, and tell me the heading you see on the page.",
            "provider_name": "gemini"
        }
        
        full_response = ""
        tool_invoked = False
        
        # We expect a tool call block to occur indicating success
        with client.stream("POST", f"{BASE_URL}/sessions/{session_id}/chat", headers=_headers(), json=chat_payload) as r_chat:
            assert r_chat.status_code == 200, "Chat request failed to initialize."
            
            for line in r_chat.iter_lines():
                if line.startswith("data: "):
                    data_str = line[len("data: "):]
                    if data_str == "[DONE]":
                        break
                        
                    try:
                        event = json.loads(data_str)
                        event_type = event.get("type")
                        
                        if event_type == "content":
                            full_response += event.get("content", "")
                        elif event_type == "tool_start":
                            if event.get("name") == "browser_automation_agent":
                                tool_invoked = True
                        elif event_type == "error":
                            pytest.fail(f"LLM backend emitted an error: {event.get('content')}")
                        elif event_type == "done":
                            break
                    except json.JSONDecodeError:
                        pass
        
        full_response = full_response.strip().lower()
        assert len(full_response) > 0, "LLM returned an entirely silent response."
        assert tool_invoked, "The LLM didn't attempt to invoke the browser tool as instructed."
        
        # The prompt asked for example.com heading ("Example Domain").
        assert "example domain" in full_response, f"LLM did not identify the correct heading. Response: {full_response}"

        # Step 3: Verify the browser agent physically saved the screenshot and metadata to the file sync system.
        # Since the session_id is either used directly or via sync_workspace_id wrapper, we search the mirrors directory.
        import subprocess
        import time
        
        # Give the mesh file-sync engine up to 15 seconds to sync the .browser_data from the Node back into the Hub's mirrors.
        print("\\n[test] Waiting up to 15s for file-sync mesh propagation of browser artifacts...")
        res = None
        for _ in range(15):
            cmd = [
                "docker", "exec", "ai_hub_service", "bash", "-c", 
                "find /app/data/mirrors/ -name '.browser_data' -type d"
            ]
            res = subprocess.run(cmd, capture_output=True, text=True)
            if res.stdout.strip():
                break
            time.sleep(1)
        
        assert res and res.returncode == 0, f"Failed to search for .browser_data inside Docker: {res.stderr}"
        
        # We expect at least one .browser_data directory corresponding to our session
        found_dirs = res.stdout.strip().split('\n')
        # match session id or sync_workspace_id (e.g. 'session-21-' or '21')
        matched_dir = next((d for d in found_dirs if str(session_id) in d), None)
        assert matched_dir, f"Could not find .browser_data directory for session {session_id} in mirrors. Found: {res.stdout}"
        
        # Now verify there's a screenshot inside it
        verify_cmd = [
            "docker", "exec", "ai_hub_service", "bash", "-c",
            f"find {matched_dir} -name '*.png' -type f | wc -l"
        ]
        res_png = subprocess.run(verify_cmd, capture_output=True, text=True)
        png_count = int(res_png.stdout.strip() or "0")
        assert png_count > 0, "No screenshot .png files were saved within the browser agent's output folder."

        # Verify metadata / A11y summary saved
        verify_meta_cmd = [
            "docker", "exec", "ai_hub_service", "bash", "-c",
            f"find {matched_dir}/.metadata -name '*.txt' -o -name '*.json' | wc -l"
        ]
        res_meta = subprocess.run(verify_meta_cmd, capture_output=True, text=True)
        meta_count = int(res_meta.stdout.strip() or "0")
        assert meta_count > 0, "No extraction metadata or A11y text files found in the .browser_data/.metadata folder."