diff --git a/frontend/src/App.js b/frontend/src/App.js index 685c734..d3243f7 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react"; import Navbar from "./components/Navbar"; import HomePage from "./pages/HomePage"; -import VoiceChatPage from "./pages/VoiceChatPage"; +import { VoiceChatPage } from "./features/voice"; import SwarmControlPage from "./pages/SwarmControlPage"; import LoginPage from "./pages/LoginPage"; import SettingsPage from "./pages/SettingsPage"; diff --git a/frontend/src/components/VoiceControls.js b/frontend/src/components/VoiceControls.js deleted file mode 100644 index 984b079..0000000 --- a/frontend/src/components/VoiceControls.js +++ /dev/null @@ -1,67 +0,0 @@ -// src/components/VoiceControls.js -import React from "react"; -import { FaMicrophone, FaRegStopCircle } from "react-icons/fa"; - -const Controls = ({ - status, - isBusy, - isRecording, - isAutoMode, - isAutoListening, - onMicClick, - onToggleAutoMode, -}) => { - const micButtonColorClass = isRecording - ? "bg-red-500 hover:bg-red-600 active:bg-red-700 shadow-red-500/20" - : "bg-indigo-600 hover:bg-indigo-700 active:bg-indigo-800 shadow-indigo-500/20"; - - const micButtonState = - isAutoMode && isAutoListening ? isAutoListening : isRecording; - - return ( -
- {/* Status indicator */} -
-
-
- {status || (isBusy ? "Thinking..." : "Ready")} -
-
- -
- {/* Mic Toggle Button */} - - - {/* Auto Mode Toggle */} - -
-
- ); -}; - -export default Controls; \ No newline at end of file diff --git a/frontend/src/features/voice/components/VoiceControls.js b/frontend/src/features/voice/components/VoiceControls.js new file mode 100644 index 0000000..984b079 --- /dev/null +++ b/frontend/src/features/voice/components/VoiceControls.js @@ -0,0 +1,67 @@ +// src/components/VoiceControls.js +import React from "react"; +import { FaMicrophone, FaRegStopCircle } from "react-icons/fa"; + +const Controls = ({ + status, + isBusy, + isRecording, + isAutoMode, + isAutoListening, + onMicClick, + onToggleAutoMode, +}) => { + const micButtonColorClass = isRecording + ? "bg-red-500 hover:bg-red-600 active:bg-red-700 shadow-red-500/20" + : "bg-indigo-600 hover:bg-indigo-700 active:bg-indigo-800 shadow-indigo-500/20"; + + const micButtonState = + isAutoMode && isAutoListening ? isAutoListening : isRecording; + + return ( +
+ {/* Status indicator */} +
+
+
+ {status || (isBusy ? "Thinking..." : "Ready")} +
+
+ +
+ {/* Mic Toggle Button */} + + + {/* Auto Mode Toggle */} + +
+
+ ); +}; + +export default Controls; \ No newline at end of file diff --git a/frontend/src/features/voice/hooks/useVoiceChat.js b/frontend/src/features/voice/hooks/useVoiceChat.js new file mode 100644 index 0000000..7ea7154 --- /dev/null +++ b/frontend/src/features/voice/hooks/useVoiceChat.js @@ -0,0 +1,791 @@ +// src/hooks/useVoiceChat.js + +// This file is a custom React hook that contains all the stateful logic +// and side effects for the voice chat application. + +import { useState, useRef, useEffect, useCallback } from "react"; +import { + createSession, + transcribeAudio, + chatWithAI, + streamSpeech, + getSessionMessages, + getSessionTokenStatus, + getUserConfig, + getSession, + uploadMessageAudio, + fetchMessageAudio +} from "../../../services/apiService"; +import { getSessionId } from "../../../services/websocket"; +import { + stopAllPlayingAudio, + stopAllMediaStreams, + resampleBuffer, + encodeWAV, +} from "../../../services/audioUtils"; + +// Constants for Voice Activity Detection and timing +const VAD_THRESHOLD = 0.01; +const VAD_SILENCE_DURATION = 2500; +const MINIMUM_AUDIO_DURATION_MS = 500; +const AUTO_MODE_COOLDOWN_MS = 3000; + +const useVoiceChat = ({ chatContainerRef }) => { + const [chatHistory, setChatHistory] = useState([]); + const [status, setStatus] = useState("Click the microphone to start recording."); + const [isBusy, setIsBusy] = useState(false); + const [isRecording, setIsRecording] = useState(false); + const [showErrorModal, setShowErrorModal] = useState(false); + const [errorMessage, setErrorMessage] = useState(""); + const [sessionId, setSessionId] = useState(null); + const [isAutoMode, setIsAutoMode] = useState(false); + const [isAutoListening, setIsAutoListening] = useState(false); + const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 }); + const [isConfigured, setIsConfigured] = useState(true); + const [missingConfigs, setMissingConfigs] = useState([]); + const [userConfigData, setUserConfigData] = useState(null); + const [localActivePrefs, setLocalActivePrefs] = useState({ llm: '', tts: '', stt: '' }); + + // All refs must be declared here, inside the custom hook. + const sessionIdRef = useRef(null); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + const audioContextRef = useRef(null); + const playbackTimeRef = useRef(0); + const isRecordingRef = useRef(false); + const isBusyRef = useRef(false); + const playingSourcesRef = useRef([]); + const vadStreamRef = useRef(null); + const scriptProcessorRef = useRef(null); + const silenceTimeoutRef = useRef(null); + const lastRequestTimeRef = useRef(0); + const streamRef = useRef(null); + + const [isStreamingPlaying, setIsStreamingPlaying] = useState(false); + + // Helper to keep ref and state in sync + const setBusy = (val) => { + setIsBusy(val); + isBusyRef.current = val; + }; + + /** + * Stops any currently playing streaming audio. + */ + const stopStreamingPlayback = useCallback(() => { + stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); + setIsStreamingPlaying(false); + setBusy(false); + }, []); + + const fetchTokenUsage = useCallback(async () => { + if (!sessionIdRef.current) return; + try { + const usage = await getSessionTokenStatus(sessionIdRef.current); + setTokenUsage(usage); + } catch (err) { + console.warn("Failed to fetch voice token usage", err); + } + }, []); + + // --- Initial Session Creation Effect --- + useEffect(() => { + const startSession = async () => { + setBusy(true); + setStatus("Loading chat session..."); + try { + let configDataToUse = null; + let providerToUse = "gemini"; + try { + configDataToUse = await getUserConfig(); + setUserConfigData(configDataToUse); + if (configDataToUse.effective?.llm?.active_provider) { + providerToUse = configDataToUse.effective.llm.active_provider; + } + } catch (e) { + console.warn("Could not load user config for Voice", e); + } + + const currentSessionId = await getSessionId("voice_chat", providerToUse); + setSessionId(currentSessionId); + sessionIdRef.current = currentSessionId; + + // Try to load chat history + try { + const messagesData = await getSessionMessages(currentSessionId); + if (messagesData && messagesData.messages && messagesData.messages.length > 0) { + const formattedHistoryPromises = messagesData.messages.map(async (msg) => { + let audioBlob = null; + if (msg.has_audio) { + try { + audioBlob = await fetchMessageAudio(msg.id); + } catch (e) { + console.warn(`Failed to fetch audio for message ${msg.id}`, e); + } + } + return { + id: msg.id, + isUser: msg.sender === "user", + text: msg.content, + reasoning: msg.reasoning_content, + timestamp: msg.created_at, + + isFromHistory: true, + audioBlob: audioBlob + }; + }); + const formattedHistory = await Promise.all(formattedHistoryPromises); + setChatHistory(formattedHistory); + } + } catch (historyErr) { + console.warn("Failed to load voice chat history", historyErr); + } + + // Load initial tokens + await fetchTokenUsage(); + + // Check if configuration is fully populated + try { + const eff = configDataToUse?.effective || {}; + + let sessionLlmProvider = providerToUse; + try { + const currentSession = await getSession(currentSessionId); + if (currentSession && currentSession.provider_name) { + sessionLlmProvider = currentSession.provider_name; + } + } catch (sessionErr) { + console.warn("Could not fetch session details", sessionErr); + } + + setLocalActivePrefs({ + llm: sessionLlmProvider, + tts: eff.tts?.active_provider || '', + stt: eff.stt?.active_provider || '' + }); + + const missing = []; + + const llmProviders = eff.llm?.providers || {}; + const hasLLMKey = Object.values(llmProviders).some(p => p.api_key && p.api_key !== 'None'); + if (!hasLLMKey) missing.push("Language Model (LLM) API Key"); + + const ttsProviders = eff.tts?.providers || {}; + const hasTTSKey = Object.values(ttsProviders).some(p => p.api_key && p.api_key !== 'None'); + if (!hasTTSKey) missing.push("Text-to-Speech (TTS) API Key"); + + const sttProviders = eff.stt?.providers || {}; + const hasSTTKey = Object.values(sttProviders).some(p => p.api_key && p.api_key !== 'None'); + if (!hasSTTKey) missing.push("Speech-to-Text (STT) API Key"); + + if (missing.length > 0) { + setIsConfigured(false); + setMissingConfigs(missing); + setStatus(`Feature not usable. Missing: ${missing.join(', ')}`); + } else { + setIsConfigured(true); + setMissingConfigs([]); + setStatus("Click the microphone to start recording."); + } + } catch (configErr) { + console.warn("Failed to load user config", configErr); + setIsConfigured(false); + setMissingConfigs(["Failed to load complete configuration"]); + } + + console.log(`Voice Session loaded with ID: ${currentSessionId}`); + } catch (err) { + console.error("Error creating session:", err); + setStatus(`Error: Could not start session. ${err.message}`); + setErrorMessage(`Failed to create a chat session: ${err.message}`); + setShowErrorModal(true); + } finally { + setIsBusy(false); + } + }; + startSession(); + + return () => { + // Pass the refs to the utility function here + stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); + stopStreamingPlayback(); + }; + }, [stopStreamingPlayback]); + + // New useEffect hook to automatically scroll to the bottom of the chat history + // The fix: `chatContainerRef` is now included in the dependency array. + useEffect(() => { + if (chatContainerRef.current) { + chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight; + } + }, [chatHistory, chatContainerRef]); + + const addMessage = (text, isUser, id = null) => { + setChatHistory((prevHistory) => [...prevHistory, { + text, + isUser, + id, + timestamp: new Date().toISOString() + }]); + }; + + /** + * Plays a stream of audio chunks using the Web Audio API by fetching them from the API. + * This is the orchestrator that uses the stateless streamSpeech API function. + * @param {string} text - The text to be synthesized by the TTS service. + * @param {number} messageId - The ID of the message to associated the audio with. + */ + const playStreamingAudio = async (text, messageId = null) => { + setBusy(true); + setIsStreamingPlaying(true); + setStatus("Streaming audio..."); + + // Stop any existing playback + stopStreamingPlayback(); + setBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process + + // Track chunks to store in history + const accumulatedChunks = []; + + try { + if (!audioContextRef.current) { + audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); + playbackTimeRef.current = audioContextRef.current.currentTime; + } + + const audioContext = audioContextRef.current; + + const onChunkReceived = (rawFloat32Data, totalChunks, currentChunkIndex) => { + // Collect for storage + accumulatedChunks.push(new Float32Array(rawFloat32Data)); + + // Update UI progress + if (totalChunks > 0) { + const progress = Math.min(Math.round((currentChunkIndex / totalChunks) * 100), 100); + setChatHistory(prev => { + const next = [...prev]; + for (let i = next.length - 1; i >= 0; i--) { + if (!next[i].isUser && !next[i].audioBlob) { + next[i].audioProgress = progress; + break; + } + } + return next; + }); + } + const float32Resampled = resampleBuffer( + rawFloat32Data, + 24000, + audioContext.sampleRate + ); + const audioBuffer = audioContext.createBuffer( + 1, + float32Resampled.length, + audioContext.sampleRate + ); + audioBuffer.copyToChannel(float32Resampled, 0); + + // Apply a very short fade-in and fade-out (2ms) to eliminate "clicks" at segment boundaries + const fadeSamps = Math.floor(audioContext.sampleRate * 0.002); + const chanData = audioBuffer.getChannelData(0); + if (chanData.length > fadeSamps * 2) { + for (let i = 0; i < fadeSamps; i++) { + chanData[i] *= (i / fadeSamps); + chanData[chanData.length - 1 - i] *= (i / fadeSamps); + } + } + + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + + const currentTime = audioContext.currentTime; + const startTime = Math.max(playbackTimeRef.current, currentTime); + + source.start(startTime); + playbackTimeRef.current = startTime + audioBuffer.duration; + + playingSourcesRef.current.push(source); + source.onended = () => { + playingSourcesRef.current = playingSourcesRef.current.filter( + (s) => s !== source + ); + }; + }; + + const onStreamDone = async () => { + console.log("TTS Stream complete."); + setIsStreamingPlaying(false); + if (accumulatedChunks.length > 0) { + // Concatenate all chunks and save the blob + const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0); + const result = new Float32Array(totalLen); + let offset = 0; + for (const c of accumulatedChunks) { + result.set(c, offset); + offset += c.length; + } + // resample to standard 44.1k for download/blob stability + const finalPcm = resampleBuffer(result, 24000, 44100); + const wavBlob = encodeWAV(finalPcm, 44100); + + // Upload to persistent storage if messageId is available + if (messageId) { + try { + console.log(`Uploading audio for message ${messageId}...`); + await uploadMessageAudio(messageId, wavBlob); + } catch (uploadErr) { + console.warn("Failed to upload persistent audio", uploadErr); + } + } + + // Post-update: find the last AI message and attach this blob + setChatHistory(prev => { + const next = [...prev]; + // Find the latest assistant message that matches this text (or just the latest) + for (let i = next.length - 1; i >= 0; i--) { + if (!next[i].isUser && !next[i].audioBlob) { + next[i].audioBlob = wavBlob; + if (messageId) next[i].id = messageId; + break; + } + } + return next; + }); + } + }; + + if (!text || !text.trim()) { + console.warn("No text to synthesize, skipping audio playback."); + return; + } + + await streamSpeech(text, onChunkReceived, onStreamDone, localActivePrefs.tts); + + } catch (err) { + console.error("Failed to stream speech:", err); + setIsStreamingPlaying(false); + setStatus(`Error: Failed to stream speech. ${err.message}`); + setErrorMessage(`Failed to stream speech: ${err.message}`); + setShowErrorModal(true); + } finally { + setBusy(false); + lastRequestTimeRef.current = Date.now(); + if (isAutoMode && isAutoListening) { + setStatus("Listening for voice..."); + } else if (!isAutoMode) { + setStatus("Click the microphone to start recording."); + } else { + setStatus("Click to start listening."); + } + } + }; + + /** + * Specifically for manual replay/synthesis of any message (including history) + */ + const synthesizeMessageAudio = async (index, text) => { + if (isBusy) return; + const accumulatedChunks = []; + + if (chatHistory[index]?.audioBlob) return; + + setIsBusy(true); + try { + if (!audioContextRef.current) { + audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); + playbackTimeRef.current = audioContextRef.current.currentTime; + } + const audioContext = audioContextRef.current; + + const onData = (rawFloat32Data, total, current) => { + accumulatedChunks.push(new Float32Array(rawFloat32Data)); + if (total > 0) { + const progress = Math.min(Math.round((current / total) * 100), 100); + setChatHistory(prev => { + const next = [...prev]; + if (next[index]) next[index].audioProgress = progress; + return next; + }); + } + + const float32Resampled = resampleBuffer(rawFloat32Data, 24000, audioContext.sampleRate); + const audioBuffer = audioContext.createBuffer(1, float32Resampled.length, audioContext.sampleRate); + audioBuffer.copyToChannel(float32Resampled, 0); + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + const startTime = Math.max(playbackTimeRef.current, audioContext.currentTime); + source.start(startTime); + playbackTimeRef.current = startTime + audioBuffer.duration; + playingSourcesRef.current.push(source); + }; + + const onDone = async () => { + if (accumulatedChunks.length > 0) { + const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0); + const result = new Float32Array(totalLen); + let offset = 0; + for (const c of accumulatedChunks) { + result.set(c, offset); + offset += c.length; + } + const finalPcm = resampleBuffer(result, 24000, 44100); + const wavBlob = encodeWAV(finalPcm, 44100); + + const messageId = chatHistory[index]?.id; + if (messageId) { + try { + await uploadMessageAudio(messageId, wavBlob); + } catch (e) { console.warn("Upload failed during manual synthesis", e); } + } + + setChatHistory(prev => { + const next = [...prev]; + if (next[index]) next[index].audioBlob = wavBlob; + return next; + }); + } + }; + + if (!text || !text.trim()) return; + + await streamSpeech(text, onData, onDone, localActivePrefs.tts); + } catch (err) { + console.error("Manual synthesis failed", err); + } finally { + setIsBusy(false); + } + }; + + const processConversation = async (audioBlob) => { + setBusy(true); + console.log("Processing conversation..."); + try { + const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000; + if (audioDuration < MINIMUM_AUDIO_DURATION_MS) { + const msg = "Your recording was too short. Please speak for at least half a second."; + console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`); + setStatus(msg); + if (!isAutoMode) { + setErrorMessage(msg); + setShowErrorModal(true); + } + lastRequestTimeRef.current = Date.now(); + return; + } + if (audioBlob.size === 0) { + const msg = "Recording stopped, but no audio was captured. Please try again."; + console.warn("Audio blob is empty, skipping STT API call."); + setStatus(msg); + if (!isAutoMode) { + setErrorMessage(msg); + setShowErrorModal(true); + } + lastRequestTimeRef.current = Date.now(); + return; + } + + setStatus("Transcribing audio..."); + const userText = await transcribeAudio(audioBlob, localActivePrefs.stt); + addMessage(userText, true); + + setStatus("AI is thinking..."); + const aiResponse = await chatWithAI(sessionId, userText, localActivePrefs.llm || "gemini"); + addMessage(aiResponse.answer, false, aiResponse.message_id); + + fetchTokenUsage(); + await playStreamingAudio(aiResponse.answer, aiResponse.message_id); + } catch (error) { + console.error("Conversation processing failed:", error); + setStatus(`Error: ${error.message}`); + setErrorMessage(`An error occurred: ${error.message}`); + setShowErrorModal(true); + } finally { + setBusy(false); + lastRequestTimeRef.current = Date.now(); + // This is the main correction: only stop streams if not in auto-listening mode + if (!isAutoMode) { + setStatus("Click the microphone to start recording."); + stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); + } else if (isAutoMode && isAutoListening) { + setStatus("Listening for voice..."); + } else { + setStatus("Click to start listening."); + } + } + }; + + const startManualRecording = async () => { + if (isRecording) return; + + try { + if (!sessionId) { + setErrorMessage("Please wait for the chat session to be initialized."); + setShowErrorModal(true); + return; + } + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + streamRef.current = stream; + mediaRecorderRef.current = new MediaRecorder(stream); + mediaRecorderRef.current.start(); + audioChunksRef.current = []; + + mediaRecorderRef.current.ondataavailable = (event) => { + audioChunksRef.current.push(event.data); + }; + + mediaRecorderRef.current.onstop = async () => { + if (streamRef.current) { + streamRef.current.getTracks().forEach(track => track.stop()); + streamRef.current = null; + } + mediaRecorderRef.current = null; + const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); + await processConversation(audioBlob); + }; + setIsRecording(true); + isRecordingRef.current = true; + setStatus("Recording... Click to stop."); + } catch (err) { + console.error("Error accessing microphone:", err); + setStatus("Error: Cannot access microphone."); + setErrorMessage("Microphone access has been denied. Please enable it."); + setShowErrorModal(true); + } + }; + + const stopManualRecording = () => { + if (mediaRecorderRef.current?.state === "recording") { + setBusy(true); + setIsRecording(false); + mediaRecorderRef.current.stop(); + } + }; + + const startAutoListening = async () => { + try { + if (!sessionId) { + setErrorMessage("Please wait for the chat session to be initialized."); + setShowErrorModal(true); + return; + } + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + vadStreamRef.current = stream; + const audioContext = new (window.AudioContext || window.webkitAudioContext)(); + const source = audioContext.createMediaStreamSource(stream); + const bufferSize = 4096; + const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1); + scriptProcessorRef.current = scriptProcessor; + source.connect(scriptProcessor); + scriptProcessor.connect(audioContext.destination); + + scriptProcessor.onaudioprocess = (event) => { + const inputBuffer = event.inputBuffer.getChannelData(0); + let sum = 0.0; + for (let i = 0; i < inputBuffer.length; i++) { + sum += inputBuffer[i] * inputBuffer[i]; + } + const volume = Math.sqrt(sum / inputBuffer.length); + const isVoiceDetected = volume > VAD_THRESHOLD; + const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current; + const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS; + + if (isVoiceDetected && !isBusyRef.current) { + if (silenceTimeoutRef.current) { + clearTimeout(silenceTimeoutRef.current); + silenceTimeoutRef.current = null; + } + if (!isRecordingRef.current && isCooldownPassed) { + startAutoRecording(stream); + } + } else if (isRecordingRef.current) { + if (!silenceTimeoutRef.current) { + silenceTimeoutRef.current = setTimeout(() => { + stopAutoRecording(); + }, VAD_SILENCE_DURATION); + } + } + }; + setIsAutoListening(true); + setStatus("Listening for voice..."); + } catch (err) { + console.error("Error accessing microphone for VAD:", err); + setStatus("Error: Cannot access microphone."); + setErrorMessage("Microphone access has been denied. Please enable it."); + setShowErrorModal(true); + } + }; + + const stopAutoListening = () => { + setIsAutoListening(false); + // Pass the refs here to the utility function + stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); + setStatus("Click to start listening."); + }; + + const startAutoRecording = (stream) => { + if (mediaRecorderRef.current?.state === "recording") return; + mediaRecorderRef.current = new MediaRecorder(stream); + audioChunksRef.current = []; + mediaRecorderRef.current.ondataavailable = (event) => { + audioChunksRef.current.push(event.data); + }; + mediaRecorderRef.current.onstop = async () => { + isRecordingRef.current = false; + setIsRecording(false); + if (audioChunksRef.current.length > 0) { + setIsBusy(true); + setStatus("Transcribing audio..."); + const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); + await processConversation(audioBlob); + } else { + setIsBusy(false); + setStatus("Listening for voice..."); + } + }; + mediaRecorderRef.current.start(); + isRecordingRef.current = true; + setIsRecording(true); + setStatus("Recording..."); + }; + + const stopAutoRecording = () => { + if (mediaRecorderRef.current?.state === "recording") { + mediaRecorderRef.current.stop(); + } + }; + + const handleMicClick = () => { + // Correctly pass the refs to the utility function + stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); + + if (!isConfigured) { + setErrorMessage("Voice Chat requires valid configurations. Please visit Settings to set up your LLM, TTS, and STT providers."); + setShowErrorModal(true); + return; + } + + if (isBusy) return; + + if (isAutoMode) { + if (isAutoListening) { + stopAutoListening(); + } else { + startAutoListening(); + } + } else { + if (isRecording) { + stopManualRecording(); + } else { + startManualRecording(); + } + } + }; + + const handleNewSession = async () => { + setChatHistory([]); + localStorage.removeItem("sessionId_voice_chat"); + + setIsBusy(true); + setStatus("Starting new session..."); + const prefProvider = userConfigData?.effective?.llm?.active_provider || "gemini"; + try { + const newSessionId = await getSessionId("voice_chat", prefProvider, { + stt_provider_name: localActivePrefs.stt || userConfigData?.effective?.stt?.active_provider, + tts_provider_name: localActivePrefs.tts || userConfigData?.effective?.tts?.active_provider + }); + setLocalActivePrefs(prev => ({ ...prev, llm: prefProvider })); + setSessionId(newSessionId); + sessionIdRef.current = newSessionId; + fetchTokenUsage(); + setStatus("Click the microphone to start recording."); + } catch (err) { + console.error("Failed to start new voice session", err); + setStatus("Error creating new session."); + } finally { + setIsBusy(false); + } + }; + + const handleSwitchSession = useCallback(async (targetSessionId) => { + localStorage.setItem("sessionId_voice_chat", targetSessionId); + setSessionId(targetSessionId); + sessionIdRef.current = targetSessionId; + + setChatHistory([]); + setStatus(`Loading session #${targetSessionId}...`); + + try { + // Because we are switching sessions, we MUST sync the settings layout + // back to the new session's provider_name + try { + const switchedSession = await getSession(targetSessionId); + if (switchedSession && switchedSession.provider_name) { + setLocalActivePrefs(prev => ({ + ...prev, + llm: switchedSession.provider_name + })); + } + } catch (e) { console.warn("Could not fetch switched session provider", e); } + + const messagesData = await getSessionMessages(targetSessionId); + if (messagesData && messagesData.messages) { + const mappedHistoryPromises = messagesData.messages.map(async (msg) => { + let audioBlob = null; + if (msg.has_audio) { + try { + audioBlob = await fetchMessageAudio(msg.id); + } catch (e) { + console.warn(`Failed to fetch audio for message ${msg.id} during switch`, e); + } + } + return { + id: msg.id, + text: msg.content, + isUser: msg.sender === 'user', + timestamp: msg.created_at, + isFromHistory: true, + audioBlob: audioBlob + }; + }); + const mappedHistory = await Promise.all(mappedHistoryPromises); + setChatHistory(mappedHistory); + } + fetchTokenUsage(); + setStatus(`Click the microphone to start recording.`); + } catch (error) { + console.error("Failed to switch session:", error); + setStatus("Failed to load session history."); + } + }, [fetchTokenUsage]); + + return { + chatHistory, + status, + isBusy, + isRecording, + isAutoMode, + isAutoListening, + sessionId, + showErrorModal, + errorMessage, + tokenUsage, + isConfigured, + missingConfigs, + userConfigData, + localActivePrefs, + setLocalActivePrefs, + setIsAutoMode, + handleMicClick, + handleNewSession, + handleSwitchSession, + setShowErrorModal, + setErrorMessage, + synthesizeMessageAudio, + isStreamingPlaying, + stopStreamingPlayback + }; +}; + +export default useVoiceChat; diff --git a/frontend/src/features/voice/index.js b/frontend/src/features/voice/index.js new file mode 100644 index 0000000..df33c1c --- /dev/null +++ b/frontend/src/features/voice/index.js @@ -0,0 +1,6 @@ +// Feature entry point for voice chat. +// Exports the primary UI and hook implementations for the voice chat feature. + +export { default as VoiceChatPage } from "./pages/VoiceChatPage"; +export { default as useVoiceChat } from "./hooks/useVoiceChat"; +export { default as VoiceControls } from "./components/VoiceControls"; diff --git a/frontend/src/features/voice/pages/VoiceChatPage.js b/frontend/src/features/voice/pages/VoiceChatPage.js new file mode 100644 index 0000000..53eca38 --- /dev/null +++ b/frontend/src/features/voice/pages/VoiceChatPage.js @@ -0,0 +1,277 @@ +import React, { useState, useRef, useEffect } from "react"; +import { useVoiceChat, VoiceControls } from ".."; +import { ChatWindow } from "../../chat"; +import SessionSidebar from "../../../components/SessionSidebar"; +import { updateSession } from "../../../services/apiService"; + +const VoiceChatPage = () => { + const chatContainerRef = useRef(null); + const [showConfigModal, setShowConfigModal] = useState(false); + const [savingConfig, setSavingConfig] = useState(false); + const [sidebarRefreshTick, setSidebarRefreshTick] = useState(0); + + const { + chatHistory, + status, + isRecording, + isBusy, + isAutoMode, + isAutoListening, + showErrorModal, + errorMessage, + tokenUsage, + setIsAutoMode, + handleMicClick, + handleNewSession, + setShowErrorModal, + handleSwitchSession, + sessionId, + isConfigured, + missingConfigs, + userConfigData, + localActivePrefs, + setLocalActivePrefs, + setErrorMessage, + synthesizeMessageAudio, + isStreamingPlaying, + stopStreamingPlayback + } = useVoiceChat({ chatContainerRef }); + + useEffect(() => { + if (chatContainerRef.current) { + chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight; + } + }, [chatHistory]); + + const toggleAutoMode = () => { + setIsAutoMode(!isAutoMode); + }; + + const handleSaveQuickConfig = async () => { + try { + setSavingConfig(true); + if (sessionId && localActivePrefs.llm) { + await updateSession(sessionId, { provider_name: localActivePrefs.llm }); + setSidebarRefreshTick(t => t + 1); + } + setShowConfigModal(false); + } catch (e) { + console.error(e); + setErrorMessage("Failed to apply the session-local providers."); + setShowErrorModal(true); + } finally { + setSavingConfig(false); + } + }; + + return ( +
+ + + {/* Main content area */} +
+
+ {/* Chat Area Box */} +
+
+ + {/* Box Header */} +

+
+
+ + + +
+
+ Voice Chat Assistant + Real-time Conversational AI +
+ {!isConfigured && ( +
+ + + +
+

Missing Key

+
    + {missingConfigs?.map((m, i) =>
  • {m}
  • )} +
+
+
+ )} + +
+
+
+
+ Token Usage +
+
+
+
80 ? 'bg-red-500' : 'bg-indigo-500'}`} + style={{ width: `${Math.min(tokenUsage?.percentage || 0, 100)}%` }} + >
+
+ 80 ? 'text-red-500' : 'text-gray-400'}`}> + {tokenUsage?.percentage || 0}% + +
+
+ +
+

+ + {/* Chat Scroll Area */} +
+ +
+ + {/* Controls Wrapper */} +
+ +
+
+
+
+
+ + {/* Error Modal */} + {showErrorModal && ( +
+
+
+ +
+

Attention Required

+

{errorMessage}

+ +
+
+ )} + + {/* Quick Settings Modal */} + {showConfigModal && ( +
+
+
+

+ + Session Engine +

+ +
+
+
+ + +
+ +
+ + +
+ +
+ + +
+
+
+ + +
+
+
+ )} +
+ ); +}; + +export default VoiceChatPage; \ No newline at end of file diff --git a/frontend/src/hooks/useVoiceChat.js b/frontend/src/hooks/useVoiceChat.js deleted file mode 100644 index 9f13faa..0000000 --- a/frontend/src/hooks/useVoiceChat.js +++ /dev/null @@ -1,791 +0,0 @@ -// src/hooks/useVoiceChat.js - -// This file is a custom React hook that contains all the stateful logic -// and side effects for the voice chat application. - -import { useState, useRef, useEffect, useCallback } from "react"; -import { - createSession, - transcribeAudio, - chatWithAI, - streamSpeech, - getSessionMessages, - getSessionTokenStatus, - getUserConfig, - getSession, - uploadMessageAudio, - fetchMessageAudio -} from "../services/apiService"; -import { getSessionId } from "../services/websocket"; -import { - stopAllPlayingAudio, - stopAllMediaStreams, - resampleBuffer, - encodeWAV, -} from "../services/audioUtils"; - -// Constants for Voice Activity Detection and timing -const VAD_THRESHOLD = 0.01; -const VAD_SILENCE_DURATION = 2500; -const MINIMUM_AUDIO_DURATION_MS = 500; -const AUTO_MODE_COOLDOWN_MS = 3000; - -const useVoiceChat = ({ chatContainerRef }) => { - const [chatHistory, setChatHistory] = useState([]); - const [status, setStatus] = useState("Click the microphone to start recording."); - const [isBusy, setIsBusy] = useState(false); - const [isRecording, setIsRecording] = useState(false); - const [showErrorModal, setShowErrorModal] = useState(false); - const [errorMessage, setErrorMessage] = useState(""); - const [sessionId, setSessionId] = useState(null); - const [isAutoMode, setIsAutoMode] = useState(false); - const [isAutoListening, setIsAutoListening] = useState(false); - const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 }); - const [isConfigured, setIsConfigured] = useState(true); - const [missingConfigs, setMissingConfigs] = useState([]); - const [userConfigData, setUserConfigData] = useState(null); - const [localActivePrefs, setLocalActivePrefs] = useState({ llm: '', tts: '', stt: '' }); - - // All refs must be declared here, inside the custom hook. - const sessionIdRef = useRef(null); - const mediaRecorderRef = useRef(null); - const audioChunksRef = useRef([]); - const audioContextRef = useRef(null); - const playbackTimeRef = useRef(0); - const isRecordingRef = useRef(false); - const isBusyRef = useRef(false); - const playingSourcesRef = useRef([]); - const vadStreamRef = useRef(null); - const scriptProcessorRef = useRef(null); - const silenceTimeoutRef = useRef(null); - const lastRequestTimeRef = useRef(0); - const streamRef = useRef(null); - - const [isStreamingPlaying, setIsStreamingPlaying] = useState(false); - - // Helper to keep ref and state in sync - const setBusy = (val) => { - setIsBusy(val); - isBusyRef.current = val; - }; - - /** - * Stops any currently playing streaming audio. - */ - const stopStreamingPlayback = useCallback(() => { - stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); - setIsStreamingPlaying(false); - setBusy(false); - }, []); - - const fetchTokenUsage = useCallback(async () => { - if (!sessionIdRef.current) return; - try { - const usage = await getSessionTokenStatus(sessionIdRef.current); - setTokenUsage(usage); - } catch (err) { - console.warn("Failed to fetch voice token usage", err); - } - }, []); - - // --- Initial Session Creation Effect --- - useEffect(() => { - const startSession = async () => { - setBusy(true); - setStatus("Loading chat session..."); - try { - let configDataToUse = null; - let providerToUse = "gemini"; - try { - configDataToUse = await getUserConfig(); - setUserConfigData(configDataToUse); - if (configDataToUse.effective?.llm?.active_provider) { - providerToUse = configDataToUse.effective.llm.active_provider; - } - } catch (e) { - console.warn("Could not load user config for Voice", e); - } - - const currentSessionId = await getSessionId("voice_chat", providerToUse); - setSessionId(currentSessionId); - sessionIdRef.current = currentSessionId; - - // Try to load chat history - try { - const messagesData = await getSessionMessages(currentSessionId); - if (messagesData && messagesData.messages && messagesData.messages.length > 0) { - const formattedHistoryPromises = messagesData.messages.map(async (msg) => { - let audioBlob = null; - if (msg.has_audio) { - try { - audioBlob = await fetchMessageAudio(msg.id); - } catch (e) { - console.warn(`Failed to fetch audio for message ${msg.id}`, e); - } - } - return { - id: msg.id, - isUser: msg.sender === "user", - text: msg.content, - reasoning: msg.reasoning_content, - timestamp: msg.created_at, - - isFromHistory: true, - audioBlob: audioBlob - }; - }); - const formattedHistory = await Promise.all(formattedHistoryPromises); - setChatHistory(formattedHistory); - } - } catch (historyErr) { - console.warn("Failed to load voice chat history", historyErr); - } - - // Load initial tokens - await fetchTokenUsage(); - - // Check if configuration is fully populated - try { - const eff = configDataToUse?.effective || {}; - - let sessionLlmProvider = providerToUse; - try { - const currentSession = await getSession(currentSessionId); - if (currentSession && currentSession.provider_name) { - sessionLlmProvider = currentSession.provider_name; - } - } catch (sessionErr) { - console.warn("Could not fetch session details", sessionErr); - } - - setLocalActivePrefs({ - llm: sessionLlmProvider, - tts: eff.tts?.active_provider || '', - stt: eff.stt?.active_provider || '' - }); - - const missing = []; - - const llmProviders = eff.llm?.providers || {}; - const hasLLMKey = Object.values(llmProviders).some(p => p.api_key && p.api_key !== 'None'); - if (!hasLLMKey) missing.push("Language Model (LLM) API Key"); - - const ttsProviders = eff.tts?.providers || {}; - const hasTTSKey = Object.values(ttsProviders).some(p => p.api_key && p.api_key !== 'None'); - if (!hasTTSKey) missing.push("Text-to-Speech (TTS) API Key"); - - const sttProviders = eff.stt?.providers || {}; - const hasSTTKey = Object.values(sttProviders).some(p => p.api_key && p.api_key !== 'None'); - if (!hasSTTKey) missing.push("Speech-to-Text (STT) API Key"); - - if (missing.length > 0) { - setIsConfigured(false); - setMissingConfigs(missing); - setStatus(`Feature not usable. Missing: ${missing.join(', ')}`); - } else { - setIsConfigured(true); - setMissingConfigs([]); - setStatus("Click the microphone to start recording."); - } - } catch (configErr) { - console.warn("Failed to load user config", configErr); - setIsConfigured(false); - setMissingConfigs(["Failed to load complete configuration"]); - } - - console.log(`Voice Session loaded with ID: ${currentSessionId}`); - } catch (err) { - console.error("Error creating session:", err); - setStatus(`Error: Could not start session. ${err.message}`); - setErrorMessage(`Failed to create a chat session: ${err.message}`); - setShowErrorModal(true); - } finally { - setIsBusy(false); - } - }; - startSession(); - - return () => { - // Pass the refs to the utility function here - stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); - stopStreamingPlayback(); - }; - }, [stopStreamingPlayback]); - - // New useEffect hook to automatically scroll to the bottom of the chat history - // The fix: `chatContainerRef` is now included in the dependency array. - useEffect(() => { - if (chatContainerRef.current) { - chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight; - } - }, [chatHistory, chatContainerRef]); - - const addMessage = (text, isUser, id = null) => { - setChatHistory((prevHistory) => [...prevHistory, { - text, - isUser, - id, - timestamp: new Date().toISOString() - }]); - }; - - /** - * Plays a stream of audio chunks using the Web Audio API by fetching them from the API. - * This is the orchestrator that uses the stateless streamSpeech API function. - * @param {string} text - The text to be synthesized by the TTS service. - * @param {number} messageId - The ID of the message to associated the audio with. - */ - const playStreamingAudio = async (text, messageId = null) => { - setBusy(true); - setIsStreamingPlaying(true); - setStatus("Streaming audio..."); - - // Stop any existing playback - stopStreamingPlayback(); - setBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process - - // Track chunks to store in history - const accumulatedChunks = []; - - try { - if (!audioContextRef.current) { - audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); - playbackTimeRef.current = audioContextRef.current.currentTime; - } - - const audioContext = audioContextRef.current; - - const onChunkReceived = (rawFloat32Data, totalChunks, currentChunkIndex) => { - // Collect for storage - accumulatedChunks.push(new Float32Array(rawFloat32Data)); - - // Update UI progress - if (totalChunks > 0) { - const progress = Math.min(Math.round((currentChunkIndex / totalChunks) * 100), 100); - setChatHistory(prev => { - const next = [...prev]; - for (let i = next.length - 1; i >= 0; i--) { - if (!next[i].isUser && !next[i].audioBlob) { - next[i].audioProgress = progress; - break; - } - } - return next; - }); - } - const float32Resampled = resampleBuffer( - rawFloat32Data, - 24000, - audioContext.sampleRate - ); - const audioBuffer = audioContext.createBuffer( - 1, - float32Resampled.length, - audioContext.sampleRate - ); - audioBuffer.copyToChannel(float32Resampled, 0); - - // Apply a very short fade-in and fade-out (2ms) to eliminate "clicks" at segment boundaries - const fadeSamps = Math.floor(audioContext.sampleRate * 0.002); - const chanData = audioBuffer.getChannelData(0); - if (chanData.length > fadeSamps * 2) { - for (let i = 0; i < fadeSamps; i++) { - chanData[i] *= (i / fadeSamps); - chanData[chanData.length - 1 - i] *= (i / fadeSamps); - } - } - - const source = audioContext.createBufferSource(); - source.buffer = audioBuffer; - source.connect(audioContext.destination); - - const currentTime = audioContext.currentTime; - const startTime = Math.max(playbackTimeRef.current, currentTime); - - source.start(startTime); - playbackTimeRef.current = startTime + audioBuffer.duration; - - playingSourcesRef.current.push(source); - source.onended = () => { - playingSourcesRef.current = playingSourcesRef.current.filter( - (s) => s !== source - ); - }; - }; - - const onStreamDone = async () => { - console.log("TTS Stream complete."); - setIsStreamingPlaying(false); - if (accumulatedChunks.length > 0) { - // Concatenate all chunks and save the blob - const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0); - const result = new Float32Array(totalLen); - let offset = 0; - for (const c of accumulatedChunks) { - result.set(c, offset); - offset += c.length; - } - // resample to standard 44.1k for download/blob stability - const finalPcm = resampleBuffer(result, 24000, 44100); - const wavBlob = encodeWAV(finalPcm, 44100); - - // Upload to persistent storage if messageId is available - if (messageId) { - try { - console.log(`Uploading audio for message ${messageId}...`); - await uploadMessageAudio(messageId, wavBlob); - } catch (uploadErr) { - console.warn("Failed to upload persistent audio", uploadErr); - } - } - - // Post-update: find the last AI message and attach this blob - setChatHistory(prev => { - const next = [...prev]; - // Find the latest assistant message that matches this text (or just the latest) - for (let i = next.length - 1; i >= 0; i--) { - if (!next[i].isUser && !next[i].audioBlob) { - next[i].audioBlob = wavBlob; - if (messageId) next[i].id = messageId; - break; - } - } - return next; - }); - } - }; - - if (!text || !text.trim()) { - console.warn("No text to synthesize, skipping audio playback."); - return; - } - - await streamSpeech(text, onChunkReceived, onStreamDone, localActivePrefs.tts); - - } catch (err) { - console.error("Failed to stream speech:", err); - setIsStreamingPlaying(false); - setStatus(`Error: Failed to stream speech. ${err.message}`); - setErrorMessage(`Failed to stream speech: ${err.message}`); - setShowErrorModal(true); - } finally { - setBusy(false); - lastRequestTimeRef.current = Date.now(); - if (isAutoMode && isAutoListening) { - setStatus("Listening for voice..."); - } else if (!isAutoMode) { - setStatus("Click the microphone to start recording."); - } else { - setStatus("Click to start listening."); - } - } - }; - - /** - * Specifically for manual replay/synthesis of any message (including history) - */ - const synthesizeMessageAudio = async (index, text) => { - if (isBusy) return; - const accumulatedChunks = []; - - if (chatHistory[index]?.audioBlob) return; - - setIsBusy(true); - try { - if (!audioContextRef.current) { - audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)(); - playbackTimeRef.current = audioContextRef.current.currentTime; - } - const audioContext = audioContextRef.current; - - const onData = (rawFloat32Data, total, current) => { - accumulatedChunks.push(new Float32Array(rawFloat32Data)); - if (total > 0) { - const progress = Math.min(Math.round((current / total) * 100), 100); - setChatHistory(prev => { - const next = [...prev]; - if (next[index]) next[index].audioProgress = progress; - return next; - }); - } - - const float32Resampled = resampleBuffer(rawFloat32Data, 24000, audioContext.sampleRate); - const audioBuffer = audioContext.createBuffer(1, float32Resampled.length, audioContext.sampleRate); - audioBuffer.copyToChannel(float32Resampled, 0); - const source = audioContext.createBufferSource(); - source.buffer = audioBuffer; - source.connect(audioContext.destination); - const startTime = Math.max(playbackTimeRef.current, audioContext.currentTime); - source.start(startTime); - playbackTimeRef.current = startTime + audioBuffer.duration; - playingSourcesRef.current.push(source); - }; - - const onDone = async () => { - if (accumulatedChunks.length > 0) { - const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0); - const result = new Float32Array(totalLen); - let offset = 0; - for (const c of accumulatedChunks) { - result.set(c, offset); - offset += c.length; - } - const finalPcm = resampleBuffer(result, 24000, 44100); - const wavBlob = encodeWAV(finalPcm, 44100); - - const messageId = chatHistory[index]?.id; - if (messageId) { - try { - await uploadMessageAudio(messageId, wavBlob); - } catch (e) { console.warn("Upload failed during manual synthesis", e); } - } - - setChatHistory(prev => { - const next = [...prev]; - if (next[index]) next[index].audioBlob = wavBlob; - return next; - }); - } - }; - - if (!text || !text.trim()) return; - - await streamSpeech(text, onData, onDone, localActivePrefs.tts); - } catch (err) { - console.error("Manual synthesis failed", err); - } finally { - setIsBusy(false); - } - }; - - const processConversation = async (audioBlob) => { - setBusy(true); - console.log("Processing conversation..."); - try { - const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000; - if (audioDuration < MINIMUM_AUDIO_DURATION_MS) { - const msg = "Your recording was too short. Please speak for at least half a second."; - console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`); - setStatus(msg); - if (!isAutoMode) { - setErrorMessage(msg); - setShowErrorModal(true); - } - lastRequestTimeRef.current = Date.now(); - return; - } - if (audioBlob.size === 0) { - const msg = "Recording stopped, but no audio was captured. Please try again."; - console.warn("Audio blob is empty, skipping STT API call."); - setStatus(msg); - if (!isAutoMode) { - setErrorMessage(msg); - setShowErrorModal(true); - } - lastRequestTimeRef.current = Date.now(); - return; - } - - setStatus("Transcribing audio..."); - const userText = await transcribeAudio(audioBlob, localActivePrefs.stt); - addMessage(userText, true); - - setStatus("AI is thinking..."); - const aiResponse = await chatWithAI(sessionId, userText, localActivePrefs.llm || "gemini"); - addMessage(aiResponse.answer, false, aiResponse.message_id); - - fetchTokenUsage(); - await playStreamingAudio(aiResponse.answer, aiResponse.message_id); - } catch (error) { - console.error("Conversation processing failed:", error); - setStatus(`Error: ${error.message}`); - setErrorMessage(`An error occurred: ${error.message}`); - setShowErrorModal(true); - } finally { - setBusy(false); - lastRequestTimeRef.current = Date.now(); - // This is the main correction: only stop streams if not in auto-listening mode - if (!isAutoMode) { - setStatus("Click the microphone to start recording."); - stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); - } else if (isAutoMode && isAutoListening) { - setStatus("Listening for voice..."); - } else { - setStatus("Click to start listening."); - } - } - }; - - const startManualRecording = async () => { - if (isRecording) return; - - try { - if (!sessionId) { - setErrorMessage("Please wait for the chat session to be initialized."); - setShowErrorModal(true); - return; - } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - streamRef.current = stream; - mediaRecorderRef.current = new MediaRecorder(stream); - mediaRecorderRef.current.start(); - audioChunksRef.current = []; - - mediaRecorderRef.current.ondataavailable = (event) => { - audioChunksRef.current.push(event.data); - }; - - mediaRecorderRef.current.onstop = async () => { - if (streamRef.current) { - streamRef.current.getTracks().forEach(track => track.stop()); - streamRef.current = null; - } - mediaRecorderRef.current = null; - const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); - await processConversation(audioBlob); - }; - setIsRecording(true); - isRecordingRef.current = true; - setStatus("Recording... Click to stop."); - } catch (err) { - console.error("Error accessing microphone:", err); - setStatus("Error: Cannot access microphone."); - setErrorMessage("Microphone access has been denied. Please enable it."); - setShowErrorModal(true); - } - }; - - const stopManualRecording = () => { - if (mediaRecorderRef.current?.state === "recording") { - setBusy(true); - setIsRecording(false); - mediaRecorderRef.current.stop(); - } - }; - - const startAutoListening = async () => { - try { - if (!sessionId) { - setErrorMessage("Please wait for the chat session to be initialized."); - setShowErrorModal(true); - return; - } - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - vadStreamRef.current = stream; - const audioContext = new (window.AudioContext || window.webkitAudioContext)(); - const source = audioContext.createMediaStreamSource(stream); - const bufferSize = 4096; - const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1); - scriptProcessorRef.current = scriptProcessor; - source.connect(scriptProcessor); - scriptProcessor.connect(audioContext.destination); - - scriptProcessor.onaudioprocess = (event) => { - const inputBuffer = event.inputBuffer.getChannelData(0); - let sum = 0.0; - for (let i = 0; i < inputBuffer.length; i++) { - sum += inputBuffer[i] * inputBuffer[i]; - } - const volume = Math.sqrt(sum / inputBuffer.length); - const isVoiceDetected = volume > VAD_THRESHOLD; - const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current; - const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS; - - if (isVoiceDetected && !isBusyRef.current) { - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (!isRecordingRef.current && isCooldownPassed) { - startAutoRecording(stream); - } - } else if (isRecordingRef.current) { - if (!silenceTimeoutRef.current) { - silenceTimeoutRef.current = setTimeout(() => { - stopAutoRecording(); - }, VAD_SILENCE_DURATION); - } - } - }; - setIsAutoListening(true); - setStatus("Listening for voice..."); - } catch (err) { - console.error("Error accessing microphone for VAD:", err); - setStatus("Error: Cannot access microphone."); - setErrorMessage("Microphone access has been denied. Please enable it."); - setShowErrorModal(true); - } - }; - - const stopAutoListening = () => { - setIsAutoListening(false); - // Pass the refs here to the utility function - stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef); - setStatus("Click to start listening."); - }; - - const startAutoRecording = (stream) => { - if (mediaRecorderRef.current?.state === "recording") return; - mediaRecorderRef.current = new MediaRecorder(stream); - audioChunksRef.current = []; - mediaRecorderRef.current.ondataavailable = (event) => { - audioChunksRef.current.push(event.data); - }; - mediaRecorderRef.current.onstop = async () => { - isRecordingRef.current = false; - setIsRecording(false); - if (audioChunksRef.current.length > 0) { - setIsBusy(true); - setStatus("Transcribing audio..."); - const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" }); - await processConversation(audioBlob); - } else { - setIsBusy(false); - setStatus("Listening for voice..."); - } - }; - mediaRecorderRef.current.start(); - isRecordingRef.current = true; - setIsRecording(true); - setStatus("Recording..."); - }; - - const stopAutoRecording = () => { - if (mediaRecorderRef.current?.state === "recording") { - mediaRecorderRef.current.stop(); - } - }; - - const handleMicClick = () => { - // Correctly pass the refs to the utility function - stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef); - - if (!isConfigured) { - setErrorMessage("Voice Chat requires valid configurations. Please visit Settings to set up your LLM, TTS, and STT providers."); - setShowErrorModal(true); - return; - } - - if (isBusy) return; - - if (isAutoMode) { - if (isAutoListening) { - stopAutoListening(); - } else { - startAutoListening(); - } - } else { - if (isRecording) { - stopManualRecording(); - } else { - startManualRecording(); - } - } - }; - - const handleNewSession = async () => { - setChatHistory([]); - localStorage.removeItem("sessionId_voice_chat"); - - setIsBusy(true); - setStatus("Starting new session..."); - const prefProvider = userConfigData?.effective?.llm?.active_provider || "gemini"; - try { - const newSessionId = await getSessionId("voice_chat", prefProvider, { - stt_provider_name: localActivePrefs.stt || userConfigData?.effective?.stt?.active_provider, - tts_provider_name: localActivePrefs.tts || userConfigData?.effective?.tts?.active_provider - }); - setLocalActivePrefs(prev => ({ ...prev, llm: prefProvider })); - setSessionId(newSessionId); - sessionIdRef.current = newSessionId; - fetchTokenUsage(); - setStatus("Click the microphone to start recording."); - } catch (err) { - console.error("Failed to start new voice session", err); - setStatus("Error creating new session."); - } finally { - setIsBusy(false); - } - }; - - const handleSwitchSession = useCallback(async (targetSessionId) => { - localStorage.setItem("sessionId_voice_chat", targetSessionId); - setSessionId(targetSessionId); - sessionIdRef.current = targetSessionId; - - setChatHistory([]); - setStatus(`Loading session #${targetSessionId}...`); - - try { - // Because we are switching sessions, we MUST sync the settings layout - // back to the new session's provider_name - try { - const switchedSession = await getSession(targetSessionId); - if (switchedSession && switchedSession.provider_name) { - setLocalActivePrefs(prev => ({ - ...prev, - llm: switchedSession.provider_name - })); - } - } catch (e) { console.warn("Could not fetch switched session provider", e); } - - const messagesData = await getSessionMessages(targetSessionId); - if (messagesData && messagesData.messages) { - const mappedHistoryPromises = messagesData.messages.map(async (msg) => { - let audioBlob = null; - if (msg.has_audio) { - try { - audioBlob = await fetchMessageAudio(msg.id); - } catch (e) { - console.warn(`Failed to fetch audio for message ${msg.id} during switch`, e); - } - } - return { - id: msg.id, - text: msg.content, - isUser: msg.sender === 'user', - timestamp: msg.created_at, - isFromHistory: true, - audioBlob: audioBlob - }; - }); - const mappedHistory = await Promise.all(mappedHistoryPromises); - setChatHistory(mappedHistory); - } - fetchTokenUsage(); - setStatus(`Click the microphone to start recording.`); - } catch (error) { - console.error("Failed to switch session:", error); - setStatus("Failed to load session history."); - } - }, [fetchTokenUsage]); - - return { - chatHistory, - status, - isBusy, - isRecording, - isAutoMode, - isAutoListening, - sessionId, - showErrorModal, - errorMessage, - tokenUsage, - isConfigured, - missingConfigs, - userConfigData, - localActivePrefs, - setLocalActivePrefs, - setIsAutoMode, - handleMicClick, - handleNewSession, - handleSwitchSession, - setShowErrorModal, - setErrorMessage, - synthesizeMessageAudio, - isStreamingPlaying, - stopStreamingPlayback - }; -}; - -export default useVoiceChat; diff --git a/frontend/src/pages/VoiceChatPage.js b/frontend/src/pages/VoiceChatPage.js deleted file mode 100644 index 8a98adf..0000000 --- a/frontend/src/pages/VoiceChatPage.js +++ /dev/null @@ -1,278 +0,0 @@ -import React, { useState, useRef, useEffect } from "react"; -import useVoiceChat from "../hooks/useVoiceChat"; -import { ChatWindow } from "../features/chat"; -import Controls from "../components/VoiceControls"; -import SessionSidebar from "../components/SessionSidebar"; -import { updateSession } from "../services/apiService"; - -const VoiceChatPage = () => { - const chatContainerRef = useRef(null); - const [showConfigModal, setShowConfigModal] = useState(false); - const [savingConfig, setSavingConfig] = useState(false); - const [sidebarRefreshTick, setSidebarRefreshTick] = useState(0); - - const { - chatHistory, - status, - isRecording, - isBusy, - isAutoMode, - isAutoListening, - showErrorModal, - errorMessage, - tokenUsage, - setIsAutoMode, - handleMicClick, - handleNewSession, - setShowErrorModal, - handleSwitchSession, - sessionId, - isConfigured, - missingConfigs, - userConfigData, - localActivePrefs, - setLocalActivePrefs, - setErrorMessage, - synthesizeMessageAudio, - isStreamingPlaying, - stopStreamingPlayback - } = useVoiceChat({ chatContainerRef }); - - useEffect(() => { - if (chatContainerRef.current) { - chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight; - } - }, [chatHistory]); - - const toggleAutoMode = () => { - setIsAutoMode(!isAutoMode); - }; - - const handleSaveQuickConfig = async () => { - try { - setSavingConfig(true); - if (sessionId && localActivePrefs.llm) { - await updateSession(sessionId, { provider_name: localActivePrefs.llm }); - setSidebarRefreshTick(t => t + 1); - } - setShowConfigModal(false); - } catch (e) { - console.error(e); - setErrorMessage("Failed to apply the session-local providers."); - setShowErrorModal(true); - } finally { - setSavingConfig(false); - } - }; - - return ( -
- - - {/* Main content area */} -
-
- {/* Chat Area Box */} -
-
- - {/* Box Header */} -

-
-
- - - -
-
- Voice Chat Assistant - Real-time Conversational AI -
- {!isConfigured && ( -
- - - -
-

Missing Key

-
    - {missingConfigs?.map((m, i) =>
  • {m}
  • )} -
-
-
- )} - -
-
-
-
- Token Usage -
-
-
-
80 ? 'bg-red-500' : 'bg-indigo-500'}`} - style={{ width: `${Math.min(tokenUsage?.percentage || 0, 100)}%` }} - >
-
- 80 ? 'text-red-500' : 'text-gray-400'}`}> - {tokenUsage?.percentage || 0}% - -
-
- -
-

- - {/* Chat Scroll Area */} -
- -
- - {/* Controls Wrapper */} -
- -
-
-
-
-
- - {/* Error Modal */} - {showErrorModal && ( -
-
-
- -
-

Attention Required

-

{errorMessage}

- -
-
- )} - - {/* Quick Settings Modal */} - {showConfigModal && ( -
-
-
-

- - Session Engine -

- -
-
-
- - -
- -
- - -
- -
- - -
-
-
- - -
-
-
- )} -
- ); -}; - -export default VoiceChatPage; \ No newline at end of file