diff --git a/ai-hub/app/api/routes/tts.py b/ai-hub/app/api/routes/tts.py index eda6404..a53069f 100644 --- a/ai-hub/app/api/routes/tts.py +++ b/ai-hub/app/api/routes/tts.py @@ -76,60 +76,74 @@ # Test first chunk synchronously to validate the provider works first_pcm = await provider.generate_speech(chunks[0]) + logger.info(f"TTS Stream started for session {user_id}. Initial chunk: {len(first_pcm)} bytes.") async def full_stream(): - # Yield the already-generated first chunk - if as_wav: - from app.core.services.tts import _create_wav_file - yield _create_wav_file(first_pcm) - else: - yield first_pcm - # Then stream the remaining chunks using parallel fetching but sequential yielding - import asyncio - semaphore = asyncio.Semaphore(3) # Limit concurrent external requests + try: + # Yield the already-generated first chunk + if as_wav: + from app.core.services.tts import _create_wav_file + yield _create_wav_file(first_pcm) + else: + yield first_pcm + + # Then stream the remaining chunks using parallel fetching but sequential yielding + import asyncio + semaphore = asyncio.Semaphore(3) # Limit concurrent external requests - async def fetch_chunk(text_chunk): - retries = 3 - delay = 1.0 - for attempt in range(retries): - try: - async with semaphore: - return await provider.generate_speech(text_chunk) - except Exception as e: - error_str = str(e) - if "No audio in response" in error_str or "finishReason" in error_str: - import logging - logging.getLogger(__name__).error(f"TTS chunk blocked by provider formatting/safety: {e}") - return None + async def fetch_chunk(text_chunk, idx): + retries = 3 + delay = 1.0 + for attempt in range(retries): + try: + async with semaphore: + pcm_data = await provider.generate_speech(text_chunk) + logger.debug(f"TTS Chunk {idx} generated: {len(pcm_data)} bytes.") + return pcm_data + except Exception as e: + error_str = str(e) + if "No audio in response" in error_str or "finishReason" in error_str: + logger.error(f"TTS Chunk {idx} blocked: {e}") + return None + + if attempt == retries - 1: + logger.error(f"TTS Chunk {idx} failed after {retries} attempts: {e}") + return None + await asyncio.sleep(delay) + delay *= 2 + + # Start all tasks concurrently + tasks = [asyncio.create_task(fetch_chunk(chunk, i+1)) for i, chunk in enumerate(chunks[1:])] + + for i, task in enumerate(tasks): + pcm = await task + if pcm: + if as_wav: + from app.core.services.tts import _create_wav_file + yield _create_wav_file(pcm) + else: + yield pcm + logger.debug(f"TTS Chunk {i+1} yielded.") - if attempt == retries - 1: - import logging - logging.getLogger(__name__).error(f"TTS chunk failed after {retries} attempts: {e}") - return None - await asyncio.sleep(delay) - delay *= 2 + except Exception as e: + logger.error(f"Runtime error in TTS stream: {e}") + raise + finally: + logger.info(f"TTS Stream finished for session {user_id}") - # Start all tasks concurrently - tasks = [asyncio.create_task(fetch_chunk(chunk)) for chunk in chunks[1:]] - - for task in tasks: - pcm = await task - if pcm: - if as_wav: - from app.core.services.tts import _create_wav_file - yield _create_wav_file(pcm) - else: - yield pcm - - media_type = "audio/wav" if as_wav else "audio/pcm" + media_type = "audio/wav" if as_wav else "application/octet-stream" return StreamingResponse( full_stream(), media_type=media_type, - headers={"X-TTS-Chunk-Count": str(len(chunks))} + headers={ + "X-TTS-Chunk-Count": str(len(chunks)), + "Cache-Control": "no-cache", + "Connection": "keep-alive" + } ) else: - # The non-streaming function only returns WAV, so this part remains the same + # The non-streaming function only returns WAV audio_bytes = await services.tts_service.create_speech_non_stream( text=request.text, provider_override=provider_override @@ -139,6 +153,7 @@ except HTTPException: raise except Exception as e: + logger.error(f"TTS route error: {e}") raise HTTPException( status_code=500, detail=f"Failed to generate speech: {e}" ) diff --git a/ui/client-app/src/hooks/useVoiceChat.js b/ui/client-app/src/hooks/useVoiceChat.js index b7e27ec..c4c35ba 100644 --- a/ui/client-app/src/hooks/useVoiceChat.js +++ b/ui/client-app/src/hooks/useVoiceChat.js @@ -53,6 +53,7 @@ const audioContextRef = useRef(null); const playbackTimeRef = useRef(0); const isRecordingRef = useRef(false); + const isBusyRef = useRef(false); const playingSourcesRef = useRef([]); const vadStreamRef = useRef(null); const scriptProcessorRef = useRef(null); @@ -62,13 +63,19 @@ const [isStreamingPlaying, setIsStreamingPlaying] = useState(false); + // Helper to keep ref and state in sync + const setBusy = (val) => { + setIsBusy(val); + isBusyRef.current = val; + }; + /** * Stops any currently playing streaming audio. */ const stopStreamingPlayback = useCallback(() => { stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); setIsStreamingPlaying(false); - setIsBusy(false); + setBusy(false); }, []); const fetchTokenUsage = useCallback(async () => { @@ -84,7 +91,7 @@ // --- Initial Session Creation Effect --- useEffect(() => { const startSession = async () => { - setIsBusy(true); + setBusy(true); setStatus("Loading chat session..."); try { let configDataToUse = null; @@ -227,13 +234,13 @@ * @param {number} messageId - The ID of the message to associated the audio with. */ const playStreamingAudio = async (text, messageId = null) => { - setIsBusy(true); + setBusy(true); setIsStreamingPlaying(true); setStatus("Streaming audio..."); // Stop any existing playback stopStreamingPlayback(); - setIsBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process + setBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process // Track chunks to store in history const accumulatedChunks = []; @@ -355,7 +362,7 @@ setErrorMessage(`Failed to stream speech: ${err.message}`); setShowErrorModal(true); } finally { - setIsBusy(false); + setBusy(false); lastRequestTimeRef.current = Date.now(); if (isAutoMode && isAutoListening) { setStatus("Listening for voice..."); @@ -443,6 +450,7 @@ }; const processConversation = async (audioBlob) => { + setBusy(true); console.log("Processing conversation..."); try { const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000; @@ -475,7 +483,7 @@ setErrorMessage(`An error occurred: ${error.message}`); setShowErrorModal(true); } finally { - setIsBusy(false); + setBusy(false); lastRequestTimeRef.current = Date.now(); // This is the main correction: only stop streams if not in auto-listening mode if (!isAutoMode) { @@ -530,7 +538,7 @@ const stopManualRecording = () => { if (mediaRecorderRef.current?.state === "recording") { - setIsBusy(true); + setBusy(true); setIsRecording(false); mediaRecorderRef.current.stop(); } @@ -564,7 +572,7 @@ const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current; const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS; - if (isVoiceDetected && !isBusy) { + if (isVoiceDetected && !isBusyRef.current) { if (silenceTimeoutRef.current) { clearTimeout(silenceTimeoutRef.current); silenceTimeoutRef.current = null;