import pytest
import httpx
import wave
import io

@pytest.mark.asyncio
async def test_root_endpoint(http_client):
    """
    Tests if the root endpoint is alive and returns the correct status message.
    """
    print("\n--- Running test_root_endpoint ---")
    response = await http_client.get("/")
    assert response.status_code == 200
    assert response.json() == {"status": "AI Model Hub is running!"}
    print("✅ Root endpoint test passed.")

@pytest.mark.asyncio
async def test_create_speech_stream(http_client):
    """
    Tests the /speech endpoint for a successful audio stream response.
    """
    print("\n--- Running test_create_speech_stream ---")
    url = "/speech"
    payload = {"text": "Hello, world!"}

    # The `stream=True` parameter tells httpx to not read the entire response body
    # at once. We'll handle it manually to check for content.
    async with http_client.stream("POST", url, json=payload) as response:
        assert response.status_code == 200, f"Speech stream request failed. Response: {response.text}"
        assert response.headers.get("content-type") == "audio/wav"

        # Check that the response body is not empty by iterating over chunks.
        content_length = 0
        async for chunk in response.aiter_bytes():
            content_length += len(chunk)

        assert content_length > 0
    print("✅ TTS stream test passed.")

@pytest.mark.asyncio
async def test_stt_transcribe_endpoint(http_client):
    """
    Tests the /stt/transcribe endpoint by uploading a dummy audio file
    and verifying the transcription response.
    Refactored to handle minor whitespace/punctuation mismatches in STT output.
    """
    print("\n--- Running test_stt_transcribe_endpoint ---")
    url = "/stt/transcribe"

    # --- Use a real audio file from the integration test data ---
    audio_file_path = "integration_tests/test_data/test-audio.wav"
    
    with open(audio_file_path, "rb") as audio_file:
        files = {'audio_file': ('test-audio.wav', audio_file, 'audio/wav')}
    
        # --- Send the POST request to the endpoint ---
        response = await http_client.post(url, files=files)
    
    # --- Assertions ---
    assert response.status_code == 200, f"STT request failed with status code {response.status_code}. Response: {response.text}"
    response_json = response.json()
    assert "transcript" in response_json, "Response JSON is missing the 'transcript' key."
    transcript = response_json["transcript"]
    assert isinstance(transcript, str), "Transcript value is not a string."
    
    # Assert that the transcript matches the expected text
    expected_transcript = "This audio is for integration testing of Cortex Hub, which is a wonderful project."
    
    # --- Refactoring to normalize for comparison (removes non-alphanumeric and standardizes spaces) ---
    import re
    
    def normalize_text(text):
        """Removes punctuation and standardizes whitespace for robust comparison."""
        # Lowercase the text
        text = text.lower()
        # Remove all non-alphanumeric characters (except spaces)
        text = re.sub(r'[^a-z0-9\s]', '', text)
        # Standardize multiple spaces to a single space, and strip leading/trailing spaces
        text = ' '.join(text.split())
        return text

    normalized_expected = normalize_text(expected_transcript)
    normalized_actual = normalize_text(transcript)

    # Assert that the normalized transcript matches the expected normalized text
    assert normalized_actual == normalized_expected, \
        f"Transcript mismatch after normalization.\n" \
        f"Expected (Normalized): '{normalized_expected}'\n" \
        f"Got (Normalized): '{normalized_actual}'\n" \
        f"Original Expected: '{expected_transcript}'\n" \
        f"Original Got: '{transcript}'"