diff --git a/ai-hub/app/api/routes/tts.py b/ai-hub/app/api/routes/tts.py index 5352008..9e3d620 100644 --- a/ai-hub/app/api/routes/tts.py +++ b/ai-hub/app/api/routes/tts.py @@ -134,7 +134,7 @@ # Then stream the remaining chunks using parallel fetching but sequential yielding import asyncio - semaphore = asyncio.Semaphore(3) # Limit concurrent external requests + semaphore = asyncio.Semaphore(1) # Strict lock for Beta TTS stability async def fetch_chunk(text_chunk, idx): retries = 3 @@ -143,7 +143,6 @@ try: async with semaphore: pcm_data = await provider.generate_speech(text_chunk) - logger.debug(f"TTS Chunk {idx} generated: {len(pcm_data)} bytes.") return pcm_data except Exception as e: error_str = str(e) @@ -168,7 +167,7 @@ yield _create_wav_file(pcm) else: yield pcm - logger.debug(f"TTS Chunk {i+1} yielded.") + logger.info(f"TTS Chunk {i+1} yielded successfully.") except Exception as e: logger.error(f"Runtime error in TTS stream: {e}") @@ -176,7 +175,7 @@ finally: logger.info(f"TTS Stream finished for session {user_id}") - media_type = "audio/wav" if as_wav else "application/octet-stream" + media_type = "audio/wav" if as_wav else "audio/wav" # Keep as wav for best client support return StreamingResponse( full_stream(), media_type=media_type, diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py index c5353d9..b3a207c 100644 --- a/ai-hub/app/core/providers/tts/gemini.py +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -78,7 +78,7 @@ before_sleep=lambda retry_state: logger.warning(f"Retrying Gemini TTS request (attempt {retry_state.attempt_number})...") ) async def generate_speech(self, text: str) -> bytes: - logger.debug(f"TTS generate_speech: '{text[:60]}...'") + logger.info(f"TTS request [model={self.model_name}, vertex={self.is_vertex}]: '{text}'") headers = {"Content-Type": "application/json"}