import os
import httpx
import pytest
BASE_URL = os.getenv("SYNC_TEST_BASE_URL", "http://127.0.0.1:8002/api/v1")
def _headers():
return {
"X-User-ID": os.environ.get("SYNC_TEST_USER_ID", "")
}
def test_tts_voices():
"""Test retrieving available TTS voices."""
with httpx.Client(timeout=10.0) as client:
r = client.get(f"{BASE_URL}/speech/voices", headers=_headers())
assert r.status_code == 200, f"Failed to get voices: {r.text}"
voices = r.json()
assert isinstance(voices, list), "Voices should be a list"
def test_tts_to_stt_lifecycle():
"""
Test generating speech from text (TTS), then transcribing that audio
back to text (STT) to verify the full audio processing pipeline.
"""
user_id = os.environ.get("SYNC_TEST_USER_ID", "")
assert user_id, "User ID not found in environment from conftest."
test_phrase = "Hello from integration test audio pipeline."
with httpx.Client(timeout=30.0) as client:
# Step 1: Generate speech (TTS)
tts_payload = {
"text": test_phrase
}
r_tts = client.post(
f"{BASE_URL}/speech",
params={"stream": False},
headers=_headers(),
json=tts_payload
)
assert r_tts.status_code == 200, f"TTS failed: {r_tts.text}"
# Ensure we got audio bytes back
audio_content = r_tts.content
assert len(audio_content) > 1000, "TTS audio content seems too small"
# Step 2: Transcribe the generated audio (STT)
files = {
"audio_file": ("test_audio_pipeline.wav", audio_content, "audio/wav")
}
r_stt = client.post(
f"{BASE_URL}/stt/transcribe",
headers=_headers(),
files=files
)
assert r_stt.status_code == 200, f"STT failed: {r_stt.text}"
stt_result = r_stt.json()
transcript = stt_result.get("transcript", "").lower()
# Assert the transcript contains our original phrase (or at least parts of it)
# Using a looser verification because STT models might slightly rephrase or omit punctuation
assert "hello" in transcript, f"Expected 'hello' in transcript: {transcript}"
assert "integration" in transcript, f"Expected 'integration' in transcript: {transcript}"