// src/hooks/useVoiceChat.js // This file is a custom React hook that contains all the stateful logic // and side effects for the voice chat application. import { useState, useRef, useEffect } from "react"; import { createSession, transcribeAudio, chatWithAI, streamSpeech, } from "../services/apiService"; import { stopAllPlayingAudio, stopAllMediaStreams, resampleBuffer, } from "../services/audioUtils"; // Constants for Voice Activity Detection and timing const VAD_THRESHOLD = 0.01; const VAD_SILENCE_DURATION = 2500; const MINIMUM_AUDIO_DURATION_MS = 500; const AUTO_MODE_COOLDOWN_MS = 3000; const useVoiceChat = ({ chatContainerRef }) => { const [chatHistory, setChatHistory] = useState([ { text: "Hello! I'm an AI assistant. How can I help you today?", isUser: false, }, ]); const [status, setStatus] = useState("Click the microphone to start recording."); const [isBusy, setIsBusy] = useState(false); const [isRecording, setIsRecording] = useState(false); const [showErrorModal, setShowErrorModal] = useState(false); const [errorMessage, setErrorMessage] = useState(""); const [sessionId, setSessionId] = useState(null); const [isAutoMode, setIsAutoMode] = useState(false); const [isAutoListening, setIsAutoListening] = useState(false); // All refs must be declared here, inside the custom hook. const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const audioContextRef = useRef(null); const playbackTimeRef = useRef(0); const isRecordingRef = useRef(false); const playingSourcesRef = useRef([]); const vadStreamRef = useRef(null); const scriptProcessorRef = useRef(null); const silenceTimeoutRef = useRef(null); const lastRequestTimeRef = useRef(0); const streamRef = useRef(null); // --- Initial Session Creation Effect --- useEffect(() => { const startSession = async () => { setIsBusy(true); setStatus("Starting new chat session..."); try { const session = await createSession(); setSessionId(session.id); console.log(`Session created with ID: ${session.id}`); setStatus("Click the microphone to start recording."); } catch (err) { console.error("Error creating session:", err); setStatus(`Error: Could not start session. ${err.message}`); setErrorMessage(`Failed to create a chat session: ${err.message}`); setShowErrorModal(true); } finally { setIsBusy(false); } }; startSession(); return () => { // Pass the refs to the utility function here stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); }; }, []); // New useEffect hook to automatically scroll to the bottom of the chat history // The fix: `chatContainerRef` is now included in the dependency array. useEffect(() => { if (chatContainerRef.current) { chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight; } }, [chatHistory, chatContainerRef]); const addMessage = (text, isUser) => { setChatHistory((prevHistory) => [...prevHistory, { text, isUser }]); }; /** * Plays a stream of audio chunks using the Web Audio API by fetching them from the API. * This is the orchestrator that uses the stateless streamSpeech API function. * @param {string} text - The text to be synthesized by the TTS service. */ const playStreamingAudio = async (text) => { setIsBusy(true); setStatus("Streaming audio..."); // Pass the refs to the utility function stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); try { if (!audioContextRef.current) { audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); playbackTimeRef.current = audioContextRef.current.currentTime; } const audioContext = audioContextRef.current; const onChunkReceived = (rawFloat32Data) => { // This is the callback that receives processed audio data from apiService. // It's responsible for using the Web Audio API to play the sound. const float32Resampled = resampleBuffer( rawFloat32Data, 24000, // The model's sample rate is hardcoded to 24000 audioContext.sampleRate ); const audioBuffer = audioContext.createBuffer( 1, float32Resampled.length, audioContext.sampleRate ); audioBuffer.copyToChannel(float32Resampled, 0); const source = audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContext.destination); const currentTime = audioContext.currentTime; const startTime = Math.max(playbackTimeRef.current, currentTime); source.start(startTime); playbackTimeRef.current = startTime + audioBuffer.duration; playingSourcesRef.current.push(source); source.onended = () => { playingSourcesRef.current = playingSourcesRef.current.filter( (s) => s !== source ); }; }; const onStreamDone = () => { // This callback is triggered when the stream finishes. console.log("TTS Stream complete."); }; // Call the stateless API function, passing the UI-related callbacks await streamSpeech(text, onChunkReceived, onStreamDone); } catch (err) { console.error("Failed to stream speech:", err); setStatus(`Error: Failed to stream speech. ${err.message}`); setErrorMessage(`Failed to stream speech: ${err.message}`); setShowErrorModal(true); } finally { setIsBusy(false); lastRequestTimeRef.current = Date.now(); if (isAutoMode && isAutoListening) { setStatus("Listening for voice..."); } else if (!isAutoMode) { setStatus("Click the microphone to start recording."); } else { setStatus("Click to start listening."); } } }; const processConversation = async (audioBlob) => { console.log("Processing conversation..."); try { const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000; if (audioDuration < MINIMUM_AUDIO_DURATION_MS) { console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`); setStatus("Audio was too short. Please speak a little longer."); lastRequestTimeRef.current = Date.now(); return; } if (audioBlob.size === 0) { console.warn("Audio blob is empty, skipping STT API call."); setStatus("Recording stopped, but no audio was captured. Please try again."); lastRequestTimeRef.current = Date.now(); return; } setStatus("Transcribing audio..."); const userText = await transcribeAudio(audioBlob); addMessage(userText, true); setStatus("AI is thinking..."); const aiText = await chatWithAI(sessionId, userText); addMessage(aiText, false); await playStreamingAudio(aiText); } catch (error) { console.error("Conversation processing failed:", error); setStatus(`Error: ${error.message}`); setErrorMessage(`An error occurred: ${error.message}`); setShowErrorModal(true); } finally { setIsBusy(false); lastRequestTimeRef.current = Date.now(); // This is the main correction: only stop streams if not in auto-listening mode if (!isAutoMode) { setStatus("Click the microphone to start recording."); stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); } else if (isAutoMode && isAutoListening) { setStatus("Listening for voice..."); } else { setStatus("Click to start listening."); } } }; const startManualRecording = async () => { if (isRecording) return; try { if (!sessionId) { setErrorMessage("Please wait for the chat session to be initialized."); setShowErrorModal(true); return; } const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); streamRef.current = stream; mediaRecorderRef.current = new MediaRecorder(stream); mediaRecorderRef.current.start(); audioChunksRef.current = []; mediaRecorderRef.current.ondataavailable = (event) => { audioChunksRef.current.push(event.data); }; mediaRecorderRef.current.onstop = async () => { if (streamRef.current) { streamRef.current.getTracks().forEach(track => track.stop()); streamRef.current = null; } mediaRecorderRef.current = null; const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); await processConversation(audioBlob); }; setIsRecording(true); isRecordingRef.current = true; setStatus("Recording... Click to stop."); } catch (err) { console.error("Error accessing microphone:", err); setStatus("Error: Cannot access microphone."); setErrorMessage("Microphone access has been denied. Please enable it."); setShowErrorModal(true); } }; const stopManualRecording = () => { if (mediaRecorderRef.current?.state === "recording") { setIsBusy(true); setIsRecording(false); mediaRecorderRef.current.stop(); } }; const startAutoListening = async () => { try { if (!sessionId) { setErrorMessage("Please wait for the chat session to be initialized."); setShowErrorModal(true); return; } const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); vadStreamRef.current = stream; const audioContext = new (window.AudioContext || window.webkitAudioContext)(); const source = audioContext.createMediaStreamSource(stream); const bufferSize = 4096; const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1); scriptProcessorRef.current = scriptProcessor; source.connect(scriptProcessor); scriptProcessor.connect(audioContext.destination); scriptProcessor.onaudioprocess = (event) => { const inputBuffer = event.inputBuffer.getChannelData(0); let sum = 0.0; for (let i = 0; i < inputBuffer.length; i++) { sum += inputBuffer[i] * inputBuffer[i]; } const volume = Math.sqrt(sum / inputBuffer.length); const isVoiceDetected = volume > VAD_THRESHOLD; const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current; const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS; if (isVoiceDetected) { if (silenceTimeoutRef.current) { clearTimeout(silenceTimeoutRef.current); silenceTimeoutRef.current = null; } if (!isRecordingRef.current && isCooldownPassed) { startAutoRecording(stream); } } else if (isRecordingRef.current) { if (!silenceTimeoutRef.current) { silenceTimeoutRef.current = setTimeout(() => { stopAutoRecording(); }, VAD_SILENCE_DURATION); } } }; setIsAutoListening(true); setStatus("Listening for voice..."); } catch (err) { console.error("Error accessing microphone for VAD:", err); setStatus("Error: Cannot access microphone."); setErrorMessage("Microphone access has been denied. Please enable it."); setShowErrorModal(true); } }; const stopAutoListening = () => { setIsAutoListening(false); // Pass the refs here to the utility function stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); setStatus("Click to start listening."); }; const startAutoRecording = (stream) => { if (mediaRecorderRef.current?.state === "recording") return; mediaRecorderRef.current = new MediaRecorder(stream); audioChunksRef.current = []; mediaRecorderRef.current.ondataavailable = (event) => { audioChunksRef.current.push(event.data); }; mediaRecorderRef.current.onstop = async () => { isRecordingRef.current = false; setIsRecording(false); if (audioChunksRef.current.length > 0) { setIsBusy(true); setStatus("Transcribing audio..."); const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); await processConversation(audioBlob); } else { setIsBusy(false); setStatus("Listening for voice..."); } }; mediaRecorderRef.current.start(); isRecordingRef.current = true; setIsRecording(true); setStatus("Recording..."); }; const stopAutoRecording = () => { if (mediaRecorderRef.current?.state === "recording") { mediaRecorderRef.current.stop(); } }; const handleMicClick = () => { // Correctly pass the refs to the utility function stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); if (isBusy) return; if (isAutoMode) { if (isAutoListening) { stopAutoListening(); } else { startAutoListening(); } } else { if (isRecording) { stopManualRecording(); } else { startManualRecording(); } } }; return { chatHistory, status, isBusy, isRecording, isAutoMode, isAutoListening, sessionId, showErrorModal, errorMessage, setIsAutoMode, handleMicClick, setShowErrorModal, }; }; export default useVoiceChat;