diff --git a/ai-hub/integration_tests/test_browser_llm.py b/ai-hub/integration_tests/test_browser_llm.py new file mode 100644 index 0000000..5def577 --- /dev/null +++ b/ai-hub/integration_tests/test_browser_llm.py @@ -0,0 +1,72 @@ +import os +import httpx +import pytest +import json + +BASE_URL = os.getenv("SYNC_TEST_BASE_URL", "http://127.0.0.1:8002/api/v1") + +def _headers(): + return { + "X-User-ID": os.environ.get("SYNC_TEST_USER_ID", "") + } + +def test_browser_skill_weather(): + """ + Test explicitly asking the LLM context to leverage its browser skill + to fetch real-time data indicating that tool resolution and execution works. + """ + user_id = os.environ.get("SYNC_TEST_USER_ID", "") + assert user_id, "User ID not found in environment from conftest." + + with httpx.Client(timeout=45.0) as client: + # Step 1: Create a new session bound to Gemini + session_payload = { + "user_id": user_id, + "provider_name": "gemini", + "feature_name": "agent_harness" + } + r_sess = client.post(f"{BASE_URL}/sessions/", headers=_headers(), json=session_payload) + assert r_sess.status_code == 200, f"Failed to create session: {r_sess.text}" + + session_id = r_sess.json()["id"] + + # Step 2: Ask a question that requires real-time knowledge (weather) + # We explicitly ask it to use its search/browser tools to ensure it invokes them. + chat_payload = { + "prompt": "Use your browser or search tools to find the current weather in Tokyo, Japan. Briefly summarize it in 1 sentence.", + "provider_name": "gemini" + } + + full_response = "" + tool_invoked = False + + # We expect a tool call block to occur indicating success + with client.stream("POST", f"{BASE_URL}/sessions/{session_id}/chat", headers=_headers(), json=chat_payload) as r_chat: + assert r_chat.status_code == 200, "Chat request failed to initialize." + + for line in r_chat.iter_lines(): + if line.startswith("data: "): + data_str = line[len("data: "):] + if data_str == "[DONE]": + break + + try: + event = json.loads(data_str) + event_type = event.get("type") + + if event_type == "content": + full_response += event.get("content", "") + elif event_type == "status": + if "Dispatching" in event.get("content", ""): + tool_invoked = True + elif event_type == "error": + pytest.fail(f"LLM backend emitted an error: {event.get('content')}") + elif event_type == "done": + break + except json.JSONDecodeError: + pass + + full_response = full_response.strip() + assert len(full_response) > 0, "LLM returned an entirely silent response." + assert tool_invoked, "The LLM didn't attempt to invoke any web/browser tools as instructed." + # If it invoked the tool and formulated a response, the end-to-end framework succeeded! diff --git a/ai-hub/integration_tests/test_llm_chat.py b/ai-hub/integration_tests/test_llm_chat.py index 7d583d7..1e222cd 100644 --- a/ai-hub/integration_tests/test_llm_chat.py +++ b/ai-hub/integration_tests/test_llm_chat.py @@ -35,7 +35,7 @@ # Step 2: Send a chat prompt instructing the LLM to echo back a phrase. # This asserts litellm, group RBAC, the context window, and SSE streaming are intact. chat_payload = { - "prompt": "I am testing your response capabilities. Please respond with exactly the text 'CORTEX HUB LIVE' and nothing else.", + "prompt": "What is the capital of France? Please respond with just the city name.", "provider_name": "gemini", "load_faiss_retriever": False } @@ -67,4 +67,4 @@ full_response = full_response.strip() assert len(full_response) > 0, "LLM returned an entirely silent response." - assert "CORTEX HUB LIVE" in full_response.upper(), f"LLM missed the instruction formatting. True Response: {full_response}" + assert "Paris" in full_response, f"LLM did not identify Paris. True Response: {full_response}"