import os
import httpx
import pytest
import json
BASE_URL = os.getenv("SYNC_TEST_BASE_URL", "http://127.0.0.1:8002/api/v1")
def _headers():
return {
"X-User-ID": os.environ.get("SYNC_TEST_USER_ID", "")
}
@pytest.mark.skipif(os.getenv("SKIP_DOCKER_NODES", "false").lower() == "true", reason="Browser skill requires a fully-loaded Docker container environment to access Chromium.")
def test_browser_skill_weather():
"""
Test explicitly asking the LLM context to leverage its browser skill
to fetch real-time data indicating that tool resolution and execution works.
"""
user_id = os.environ.get("SYNC_TEST_USER_ID", "")
assert user_id, "User ID not found in environment from conftest."
with httpx.Client(timeout=45.0) as client:
# Step 1: Create a new session bound to Gemini
session_payload = {
"user_id": user_id,
"provider_name": "gemini",
"feature_name": "agent_harness"
}
r_sess = client.post(f"{BASE_URL}/sessions/", headers=_headers(), json=session_payload)
assert r_sess.status_code == 200, f"Failed to create session: {r_sess.text}"
session_id = r_sess.json()["id"]
# Step 2: Ask a question that requires the browser to take a snapshot/screenshot
# We explicitly ask it to navigate and snapshot to guarantee image output.
chat_payload = {
"prompt": "Use your browser_automation_agent tool to navigate to https://example.com, take a screenshot of the page, and tell me the heading you see on the page.",
"provider_name": "gemini"
}
full_response = ""
tool_invoked = False
# We expect a tool call block to occur indicating success
with client.stream("POST", f"{BASE_URL}/sessions/{session_id}/chat", headers=_headers(), json=chat_payload) as r_chat:
assert r_chat.status_code == 200, "Chat request failed to initialize."
for line in r_chat.iter_lines():
if line.startswith("data: "):
data_str = line[len("data: "):]
if data_str == "[DONE]":
break
try:
event = json.loads(data_str)
event_type = event.get("type")
if event_type == "content":
full_response += event.get("content", "")
elif event_type == "tool_start":
if event.get("name") == "browser_automation_agent":
tool_invoked = True
elif event_type == "error":
pytest.fail(f"LLM backend emitted an error: {event.get('content')}")
elif event_type == "done":
break
except json.JSONDecodeError:
pass
full_response = full_response.strip().lower()
assert len(full_response) > 0, "LLM returned an entirely silent response."
assert tool_invoked, "The LLM didn't attempt to invoke the browser tool as instructed."
# The prompt asked for example.com heading ("Example Domain").
assert "example domain" in full_response, f"LLM did not identify the correct heading. Response: {full_response}"
# Step 3: Verify the browser agent physically saved the screenshot and metadata to the file sync system.
# Since the session_id is either used directly or via sync_workspace_id wrapper, we search the mirrors directory.
import subprocess
import time
# Give the mesh file-sync engine up to 15 seconds to sync the .browser_data from the Node back into the Hub's mirrors.
print("\\n[test] Waiting up to 15s for file-sync mesh propagation of browser artifacts...")
res = None
for _ in range(15):
cmd = [
"docker", "exec", "ai_hub_service", "bash", "-c",
"find /app/data/mirrors/ -name '.browser_data' -type d"
]
res = subprocess.run(cmd, capture_output=True, text=True)
if res.stdout.strip():
break
time.sleep(1)
assert res and res.returncode == 0, f"Failed to search for .browser_data inside Docker: {res.stderr}"
# We expect at least one .browser_data directory corresponding to our session
found_dirs = res.stdout.strip().split('\n')
# match session id or sync_workspace_id (e.g. 'session-21-' or '21')
matched_dir = next((d for d in found_dirs if str(session_id) in d), None)
assert matched_dir, f"Could not find .browser_data directory for session {session_id} in mirrors. Found: {res.stdout}"
# Now verify there's a screenshot inside it
verify_cmd = [
"docker", "exec", "ai_hub_service", "bash", "-c",
f"find {matched_dir} -name '*.png' -type f | wc -l"
]
res_png = subprocess.run(verify_cmd, capture_output=True, text=True)
png_count = int(res_png.stdout.strip() or "0")
assert png_count > 0, "No screenshot .png files were saved within the browser agent's output folder."
# Verify metadata / A11y summary saved
verify_meta_cmd = [
"docker", "exec", "ai_hub_service", "bash", "-c",
f"find {matched_dir}/.metadata -name '*.txt' -o -name '*.json' | wc -l"
]
res_meta = subprocess.run(verify_meta_cmd, capture_output=True, text=True)
meta_count = int(res_meta.stdout.strip() or "0")
assert meta_count > 0, "No extraction metadata or A11y text files found in the .browser_data/.metadata folder."