// src/hooks/useVoiceChat.js
// This file is a custom React hook that contains all the stateful logic
// and side effects for the voice chat application.
import { useState, useRef, useEffect } from "react";
import {
createSession,
transcribeAudio,
chatWithAI,
streamSpeech,
} from "../services/apiService";
import {
stopAllPlayingAudio,
stopAllMediaStreams,
resampleBuffer,
} from "../services/audioUtils";
// Constants for Voice Activity Detection and timing
const VAD_THRESHOLD = 0.01;
const VAD_SILENCE_DURATION = 2500;
const MINIMUM_AUDIO_DURATION_MS = 500;
const AUTO_MODE_COOLDOWN_MS = 3000;
const useVoiceChat = ({ chatContainerRef }) => {
const [chatHistory, setChatHistory] = useState([
{
text: "Hello! I'm an AI assistant. How can I help you today?",
isUser: false,
},
]);
const [status, setStatus] = useState("Click the microphone to start recording.");
const [isBusy, setIsBusy] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const [showErrorModal, setShowErrorModal] = useState(false);
const [errorMessage, setErrorMessage] = useState("");
const [sessionId, setSessionId] = useState(null);
const [isAutoMode, setIsAutoMode] = useState(false);
const [isAutoListening, setIsAutoListening] = useState(false);
// All refs must be declared here, inside the custom hook.
const mediaRecorderRef = useRef(null);
const audioChunksRef = useRef([]);
const audioContextRef = useRef(null);
const playbackTimeRef = useRef(0);
const isRecordingRef = useRef(false);
const playingSourcesRef = useRef([]);
const vadStreamRef = useRef(null);
const scriptProcessorRef = useRef(null);
const silenceTimeoutRef = useRef(null);
const lastRequestTimeRef = useRef(0);
const streamRef = useRef(null);
// --- Initial Session Creation Effect ---
useEffect(() => {
const startSession = async () => {
setIsBusy(true);
setStatus("Starting new chat session...");
try {
const session = await createSession();
setSessionId(session.id);
console.log(`Session created with ID: ${session.id}`);
setStatus("Click the microphone to start recording.");
} catch (err) {
console.error("Error creating session:", err);
setStatus(`Error: Could not start session. ${err.message}`);
setErrorMessage(`Failed to create a chat session: ${err.message}`);
setShowErrorModal(true);
} finally {
setIsBusy(false);
}
};
startSession();
return () => {
// Pass the refs to the utility function here
stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
};
}, []);
// New useEffect hook to automatically scroll to the bottom of the chat history
// The fix: `chatContainerRef` is now included in the dependency array.
useEffect(() => {
if (chatContainerRef.current) {
chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
}
}, [chatHistory, chatContainerRef]);
const addMessage = (text, isUser) => {
setChatHistory((prevHistory) => [...prevHistory, { text, isUser }]);
};
/**
* Plays a stream of audio chunks using the Web Audio API by fetching them from the API.
* This is the orchestrator that uses the stateless streamSpeech API function.
* @param {string} text - The text to be synthesized by the TTS service.
*/
const playStreamingAudio = async (text) => {
setIsBusy(true);
setStatus("Streaming audio...");
// Pass the refs to the utility function
stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
try {
if (!audioContextRef.current) {
audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)();
playbackTimeRef.current = audioContextRef.current.currentTime;
}
const audioContext = audioContextRef.current;
const onChunkReceived = (rawFloat32Data) => {
// This is the callback that receives processed audio data from apiService.
// It's responsible for using the Web Audio API to play the sound.
const float32Resampled = resampleBuffer(
rawFloat32Data,
24000, // The model's sample rate is hardcoded to 24000
audioContext.sampleRate
);
const audioBuffer = audioContext.createBuffer(
1,
float32Resampled.length,
audioContext.sampleRate
);
audioBuffer.copyToChannel(float32Resampled, 0);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
const currentTime = audioContext.currentTime;
const startTime = Math.max(playbackTimeRef.current, currentTime);
source.start(startTime);
playbackTimeRef.current = startTime + audioBuffer.duration;
playingSourcesRef.current.push(source);
source.onended = () => {
playingSourcesRef.current = playingSourcesRef.current.filter(
(s) => s !== source
);
};
};
const onStreamDone = () => {
// This callback is triggered when the stream finishes.
console.log("TTS Stream complete.");
};
// Call the stateless API function, passing the UI-related callbacks
await streamSpeech(text, onChunkReceived, onStreamDone);
} catch (err) {
console.error("Failed to stream speech:", err);
setStatus(`Error: Failed to stream speech. ${err.message}`);
setErrorMessage(`Failed to stream speech: ${err.message}`);
setShowErrorModal(true);
} finally {
setIsBusy(false);
lastRequestTimeRef.current = Date.now();
if (isAutoMode && isAutoListening) {
setStatus("Listening for voice...");
} else if (!isAutoMode) {
setStatus("Click the microphone to start recording.");
} else {
setStatus("Click to start listening.");
}
}
};
const processConversation = async (audioBlob) => {
console.log("Processing conversation...");
try {
const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000;
if (audioDuration < MINIMUM_AUDIO_DURATION_MS) {
console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`);
setStatus("Audio was too short. Please speak a little longer.");
lastRequestTimeRef.current = Date.now();
return;
}
if (audioBlob.size === 0) {
console.warn("Audio blob is empty, skipping STT API call.");
setStatus("Recording stopped, but no audio was captured. Please try again.");
lastRequestTimeRef.current = Date.now();
return;
}
setStatus("Transcribing audio...");
const userText = await transcribeAudio(audioBlob);
addMessage(userText, true);
setStatus("AI is thinking...");
const aiText = await chatWithAI(sessionId, userText);
addMessage(aiText, false);
await playStreamingAudio(aiText);
} catch (error) {
console.error("Conversation processing failed:", error);
setStatus(`Error: ${error.message}`);
setErrorMessage(`An error occurred: ${error.message}`);
setShowErrorModal(true);
} finally {
setIsBusy(false);
lastRequestTimeRef.current = Date.now();
// This is the main correction: only stop streams if not in auto-listening mode
if (!isAutoMode) {
setStatus("Click the microphone to start recording.");
stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
} else if (isAutoMode && isAutoListening) {
setStatus("Listening for voice...");
} else {
setStatus("Click to start listening.");
}
}
};
const startManualRecording = async () => {
if (isRecording) return;
try {
if (!sessionId) {
setErrorMessage("Please wait for the chat session to be initialized.");
setShowErrorModal(true);
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
mediaRecorderRef.current = new MediaRecorder(stream);
mediaRecorderRef.current.start();
audioChunksRef.current = [];
mediaRecorderRef.current.ondataavailable = (event) => {
audioChunksRef.current.push(event.data);
};
mediaRecorderRef.current.onstop = async () => {
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
streamRef.current = null;
}
mediaRecorderRef.current = null;
const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
await processConversation(audioBlob);
};
setIsRecording(true);
isRecordingRef.current = true;
setStatus("Recording... Click to stop.");
} catch (err) {
console.error("Error accessing microphone:", err);
setStatus("Error: Cannot access microphone.");
setErrorMessage("Microphone access has been denied. Please enable it.");
setShowErrorModal(true);
}
};
const stopManualRecording = () => {
if (mediaRecorderRef.current?.state === "recording") {
setIsBusy(true);
setIsRecording(false);
mediaRecorderRef.current.stop();
}
};
const startAutoListening = async () => {
try {
if (!sessionId) {
setErrorMessage("Please wait for the chat session to be initialized.");
setShowErrorModal(true);
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
vadStreamRef.current = stream;
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const source = audioContext.createMediaStreamSource(stream);
const bufferSize = 4096;
const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
scriptProcessorRef.current = scriptProcessor;
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
scriptProcessor.onaudioprocess = (event) => {
const inputBuffer = event.inputBuffer.getChannelData(0);
let sum = 0.0;
for (let i = 0; i < inputBuffer.length; i++) {
sum += inputBuffer[i] * inputBuffer[i];
}
const volume = Math.sqrt(sum / inputBuffer.length);
const isVoiceDetected = volume > VAD_THRESHOLD;
const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current;
const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS;
if (isVoiceDetected) {
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (!isRecordingRef.current && isCooldownPassed) {
startAutoRecording(stream);
}
} else if (isRecordingRef.current) {
if (!silenceTimeoutRef.current) {
silenceTimeoutRef.current = setTimeout(() => {
stopAutoRecording();
}, VAD_SILENCE_DURATION);
}
}
};
setIsAutoListening(true);
setStatus("Listening for voice...");
} catch (err) {
console.error("Error accessing microphone for VAD:", err);
setStatus("Error: Cannot access microphone.");
setErrorMessage("Microphone access has been denied. Please enable it.");
setShowErrorModal(true);
}
};
const stopAutoListening = () => {
setIsAutoListening(false);
// Pass the refs here to the utility function
stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
setStatus("Click to start listening.");
};
const startAutoRecording = (stream) => {
if (mediaRecorderRef.current?.state === "recording") return;
mediaRecorderRef.current = new MediaRecorder(stream);
audioChunksRef.current = [];
mediaRecorderRef.current.ondataavailable = (event) => {
audioChunksRef.current.push(event.data);
};
mediaRecorderRef.current.onstop = async () => {
isRecordingRef.current = false;
setIsRecording(false);
if (audioChunksRef.current.length > 0) {
setIsBusy(true);
setStatus("Transcribing audio...");
const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
await processConversation(audioBlob);
} else {
setIsBusy(false);
setStatus("Listening for voice...");
}
};
mediaRecorderRef.current.start();
isRecordingRef.current = true;
setIsRecording(true);
setStatus("Recording...");
};
const stopAutoRecording = () => {
if (mediaRecorderRef.current?.state === "recording") {
mediaRecorderRef.current.stop();
}
};
const handleMicClick = () => {
// Correctly pass the refs to the utility function
stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
if (isBusy) return;
if (isAutoMode) {
if (isAutoListening) {
stopAutoListening();
} else {
startAutoListening();
}
} else {
if (isRecording) {
stopManualRecording();
} else {
startManualRecording();
}
}
};
return {
chatHistory,
status,
isBusy,
isRecording,
isAutoMode,
isAutoListening,
sessionId,
showErrorModal,
errorMessage,
setIsAutoMode,
handleMicClick,
setShowErrorModal,
};
};
export default useVoiceChat;