diff --git a/frontend/src/App.js b/frontend/src/App.js
index 685c734..d3243f7 100644
--- a/frontend/src/App.js
+++ b/frontend/src/App.js
@@ -2,7 +2,7 @@
import React, { useState, useEffect } from "react";
import Navbar from "./components/Navbar";
import HomePage from "./pages/HomePage";
-import VoiceChatPage from "./pages/VoiceChatPage";
+import { VoiceChatPage } from "./features/voice";
import SwarmControlPage from "./pages/SwarmControlPage";
import LoginPage from "./pages/LoginPage";
import SettingsPage from "./pages/SettingsPage";
diff --git a/frontend/src/components/VoiceControls.js b/frontend/src/components/VoiceControls.js
deleted file mode 100644
index 984b079..0000000
--- a/frontend/src/components/VoiceControls.js
+++ /dev/null
@@ -1,67 +0,0 @@
-// src/components/VoiceControls.js
-import React from "react";
-import { FaMicrophone, FaRegStopCircle } from "react-icons/fa";
-
-const Controls = ({
- status,
- isBusy,
- isRecording,
- isAutoMode,
- isAutoListening,
- onMicClick,
- onToggleAutoMode,
-}) => {
- const micButtonColorClass = isRecording
- ? "bg-red-500 hover:bg-red-600 active:bg-red-700 shadow-red-500/20"
- : "bg-indigo-600 hover:bg-indigo-700 active:bg-indigo-800 shadow-indigo-500/20";
-
- const micButtonState =
- isAutoMode && isAutoListening ? isAutoListening : isRecording;
-
- return (
-
- {/* Status indicator */}
-
-
-
- {status || (isBusy ? "Thinking..." : "Ready")}
-
-
-
-
- {/* Mic Toggle Button */}
-
-
- {/* Auto Mode Toggle */}
-
-
-
- );
-};
-
-export default Controls;
\ No newline at end of file
diff --git a/frontend/src/features/voice/components/VoiceControls.js b/frontend/src/features/voice/components/VoiceControls.js
new file mode 100644
index 0000000..984b079
--- /dev/null
+++ b/frontend/src/features/voice/components/VoiceControls.js
@@ -0,0 +1,67 @@
+// src/components/VoiceControls.js
+import React from "react";
+import { FaMicrophone, FaRegStopCircle } from "react-icons/fa";
+
+const Controls = ({
+ status,
+ isBusy,
+ isRecording,
+ isAutoMode,
+ isAutoListening,
+ onMicClick,
+ onToggleAutoMode,
+}) => {
+ const micButtonColorClass = isRecording
+ ? "bg-red-500 hover:bg-red-600 active:bg-red-700 shadow-red-500/20"
+ : "bg-indigo-600 hover:bg-indigo-700 active:bg-indigo-800 shadow-indigo-500/20";
+
+ const micButtonState =
+ isAutoMode && isAutoListening ? isAutoListening : isRecording;
+
+ return (
+
+ {/* Status indicator */}
+
+
+
+ {status || (isBusy ? "Thinking..." : "Ready")}
+
+
+
+
+ {/* Mic Toggle Button */}
+
+
+ {/* Auto Mode Toggle */}
+
+
+
+ );
+};
+
+export default Controls;
\ No newline at end of file
diff --git a/frontend/src/features/voice/hooks/useVoiceChat.js b/frontend/src/features/voice/hooks/useVoiceChat.js
new file mode 100644
index 0000000..7ea7154
--- /dev/null
+++ b/frontend/src/features/voice/hooks/useVoiceChat.js
@@ -0,0 +1,791 @@
+// src/hooks/useVoiceChat.js
+
+// This file is a custom React hook that contains all the stateful logic
+// and side effects for the voice chat application.
+
+import { useState, useRef, useEffect, useCallback } from "react";
+import {
+ createSession,
+ transcribeAudio,
+ chatWithAI,
+ streamSpeech,
+ getSessionMessages,
+ getSessionTokenStatus,
+ getUserConfig,
+ getSession,
+ uploadMessageAudio,
+ fetchMessageAudio
+} from "../../../services/apiService";
+import { getSessionId } from "../../../services/websocket";
+import {
+ stopAllPlayingAudio,
+ stopAllMediaStreams,
+ resampleBuffer,
+ encodeWAV,
+} from "../../../services/audioUtils";
+
+// Constants for Voice Activity Detection and timing
+const VAD_THRESHOLD = 0.01;
+const VAD_SILENCE_DURATION = 2500;
+const MINIMUM_AUDIO_DURATION_MS = 500;
+const AUTO_MODE_COOLDOWN_MS = 3000;
+
+const useVoiceChat = ({ chatContainerRef }) => {
+ const [chatHistory, setChatHistory] = useState([]);
+ const [status, setStatus] = useState("Click the microphone to start recording.");
+ const [isBusy, setIsBusy] = useState(false);
+ const [isRecording, setIsRecording] = useState(false);
+ const [showErrorModal, setShowErrorModal] = useState(false);
+ const [errorMessage, setErrorMessage] = useState("");
+ const [sessionId, setSessionId] = useState(null);
+ const [isAutoMode, setIsAutoMode] = useState(false);
+ const [isAutoListening, setIsAutoListening] = useState(false);
+ const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 });
+ const [isConfigured, setIsConfigured] = useState(true);
+ const [missingConfigs, setMissingConfigs] = useState([]);
+ const [userConfigData, setUserConfigData] = useState(null);
+ const [localActivePrefs, setLocalActivePrefs] = useState({ llm: '', tts: '', stt: '' });
+
+ // All refs must be declared here, inside the custom hook.
+ const sessionIdRef = useRef(null);
+ const mediaRecorderRef = useRef(null);
+ const audioChunksRef = useRef([]);
+ const audioContextRef = useRef(null);
+ const playbackTimeRef = useRef(0);
+ const isRecordingRef = useRef(false);
+ const isBusyRef = useRef(false);
+ const playingSourcesRef = useRef([]);
+ const vadStreamRef = useRef(null);
+ const scriptProcessorRef = useRef(null);
+ const silenceTimeoutRef = useRef(null);
+ const lastRequestTimeRef = useRef(0);
+ const streamRef = useRef(null);
+
+ const [isStreamingPlaying, setIsStreamingPlaying] = useState(false);
+
+ // Helper to keep ref and state in sync
+ const setBusy = (val) => {
+ setIsBusy(val);
+ isBusyRef.current = val;
+ };
+
+ /**
+ * Stops any currently playing streaming audio.
+ */
+ const stopStreamingPlayback = useCallback(() => {
+ stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
+ setIsStreamingPlaying(false);
+ setBusy(false);
+ }, []);
+
+ const fetchTokenUsage = useCallback(async () => {
+ if (!sessionIdRef.current) return;
+ try {
+ const usage = await getSessionTokenStatus(sessionIdRef.current);
+ setTokenUsage(usage);
+ } catch (err) {
+ console.warn("Failed to fetch voice token usage", err);
+ }
+ }, []);
+
+ // --- Initial Session Creation Effect ---
+ useEffect(() => {
+ const startSession = async () => {
+ setBusy(true);
+ setStatus("Loading chat session...");
+ try {
+ let configDataToUse = null;
+ let providerToUse = "gemini";
+ try {
+ configDataToUse = await getUserConfig();
+ setUserConfigData(configDataToUse);
+ if (configDataToUse.effective?.llm?.active_provider) {
+ providerToUse = configDataToUse.effective.llm.active_provider;
+ }
+ } catch (e) {
+ console.warn("Could not load user config for Voice", e);
+ }
+
+ const currentSessionId = await getSessionId("voice_chat", providerToUse);
+ setSessionId(currentSessionId);
+ sessionIdRef.current = currentSessionId;
+
+ // Try to load chat history
+ try {
+ const messagesData = await getSessionMessages(currentSessionId);
+ if (messagesData && messagesData.messages && messagesData.messages.length > 0) {
+ const formattedHistoryPromises = messagesData.messages.map(async (msg) => {
+ let audioBlob = null;
+ if (msg.has_audio) {
+ try {
+ audioBlob = await fetchMessageAudio(msg.id);
+ } catch (e) {
+ console.warn(`Failed to fetch audio for message ${msg.id}`, e);
+ }
+ }
+ return {
+ id: msg.id,
+ isUser: msg.sender === "user",
+ text: msg.content,
+ reasoning: msg.reasoning_content,
+ timestamp: msg.created_at,
+
+ isFromHistory: true,
+ audioBlob: audioBlob
+ };
+ });
+ const formattedHistory = await Promise.all(formattedHistoryPromises);
+ setChatHistory(formattedHistory);
+ }
+ } catch (historyErr) {
+ console.warn("Failed to load voice chat history", historyErr);
+ }
+
+ // Load initial tokens
+ await fetchTokenUsage();
+
+ // Check if configuration is fully populated
+ try {
+ const eff = configDataToUse?.effective || {};
+
+ let sessionLlmProvider = providerToUse;
+ try {
+ const currentSession = await getSession(currentSessionId);
+ if (currentSession && currentSession.provider_name) {
+ sessionLlmProvider = currentSession.provider_name;
+ }
+ } catch (sessionErr) {
+ console.warn("Could not fetch session details", sessionErr);
+ }
+
+ setLocalActivePrefs({
+ llm: sessionLlmProvider,
+ tts: eff.tts?.active_provider || '',
+ stt: eff.stt?.active_provider || ''
+ });
+
+ const missing = [];
+
+ const llmProviders = eff.llm?.providers || {};
+ const hasLLMKey = Object.values(llmProviders).some(p => p.api_key && p.api_key !== 'None');
+ if (!hasLLMKey) missing.push("Language Model (LLM) API Key");
+
+ const ttsProviders = eff.tts?.providers || {};
+ const hasTTSKey = Object.values(ttsProviders).some(p => p.api_key && p.api_key !== 'None');
+ if (!hasTTSKey) missing.push("Text-to-Speech (TTS) API Key");
+
+ const sttProviders = eff.stt?.providers || {};
+ const hasSTTKey = Object.values(sttProviders).some(p => p.api_key && p.api_key !== 'None');
+ if (!hasSTTKey) missing.push("Speech-to-Text (STT) API Key");
+
+ if (missing.length > 0) {
+ setIsConfigured(false);
+ setMissingConfigs(missing);
+ setStatus(`Feature not usable. Missing: ${missing.join(', ')}`);
+ } else {
+ setIsConfigured(true);
+ setMissingConfigs([]);
+ setStatus("Click the microphone to start recording.");
+ }
+ } catch (configErr) {
+ console.warn("Failed to load user config", configErr);
+ setIsConfigured(false);
+ setMissingConfigs(["Failed to load complete configuration"]);
+ }
+
+ console.log(`Voice Session loaded with ID: ${currentSessionId}`);
+ } catch (err) {
+ console.error("Error creating session:", err);
+ setStatus(`Error: Could not start session. ${err.message}`);
+ setErrorMessage(`Failed to create a chat session: ${err.message}`);
+ setShowErrorModal(true);
+ } finally {
+ setIsBusy(false);
+ }
+ };
+ startSession();
+
+ return () => {
+ // Pass the refs to the utility function here
+ stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
+ stopStreamingPlayback();
+ };
+ }, [stopStreamingPlayback]);
+
+ // New useEffect hook to automatically scroll to the bottom of the chat history
+ // The fix: `chatContainerRef` is now included in the dependency array.
+ useEffect(() => {
+ if (chatContainerRef.current) {
+ chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
+ }
+ }, [chatHistory, chatContainerRef]);
+
+ const addMessage = (text, isUser, id = null) => {
+ setChatHistory((prevHistory) => [...prevHistory, {
+ text,
+ isUser,
+ id,
+ timestamp: new Date().toISOString()
+ }]);
+ };
+
+ /**
+ * Plays a stream of audio chunks using the Web Audio API by fetching them from the API.
+ * This is the orchestrator that uses the stateless streamSpeech API function.
+ * @param {string} text - The text to be synthesized by the TTS service.
+ * @param {number} messageId - The ID of the message to associated the audio with.
+ */
+ const playStreamingAudio = async (text, messageId = null) => {
+ setBusy(true);
+ setIsStreamingPlaying(true);
+ setStatus("Streaming audio...");
+
+ // Stop any existing playback
+ stopStreamingPlayback();
+ setBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process
+
+ // Track chunks to store in history
+ const accumulatedChunks = [];
+
+ try {
+ if (!audioContextRef.current) {
+ audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)();
+ playbackTimeRef.current = audioContextRef.current.currentTime;
+ }
+
+ const audioContext = audioContextRef.current;
+
+ const onChunkReceived = (rawFloat32Data, totalChunks, currentChunkIndex) => {
+ // Collect for storage
+ accumulatedChunks.push(new Float32Array(rawFloat32Data));
+
+ // Update UI progress
+ if (totalChunks > 0) {
+ const progress = Math.min(Math.round((currentChunkIndex / totalChunks) * 100), 100);
+ setChatHistory(prev => {
+ const next = [...prev];
+ for (let i = next.length - 1; i >= 0; i--) {
+ if (!next[i].isUser && !next[i].audioBlob) {
+ next[i].audioProgress = progress;
+ break;
+ }
+ }
+ return next;
+ });
+ }
+ const float32Resampled = resampleBuffer(
+ rawFloat32Data,
+ 24000,
+ audioContext.sampleRate
+ );
+ const audioBuffer = audioContext.createBuffer(
+ 1,
+ float32Resampled.length,
+ audioContext.sampleRate
+ );
+ audioBuffer.copyToChannel(float32Resampled, 0);
+
+ // Apply a very short fade-in and fade-out (2ms) to eliminate "clicks" at segment boundaries
+ const fadeSamps = Math.floor(audioContext.sampleRate * 0.002);
+ const chanData = audioBuffer.getChannelData(0);
+ if (chanData.length > fadeSamps * 2) {
+ for (let i = 0; i < fadeSamps; i++) {
+ chanData[i] *= (i / fadeSamps);
+ chanData[chanData.length - 1 - i] *= (i / fadeSamps);
+ }
+ }
+
+ const source = audioContext.createBufferSource();
+ source.buffer = audioBuffer;
+ source.connect(audioContext.destination);
+
+ const currentTime = audioContext.currentTime;
+ const startTime = Math.max(playbackTimeRef.current, currentTime);
+
+ source.start(startTime);
+ playbackTimeRef.current = startTime + audioBuffer.duration;
+
+ playingSourcesRef.current.push(source);
+ source.onended = () => {
+ playingSourcesRef.current = playingSourcesRef.current.filter(
+ (s) => s !== source
+ );
+ };
+ };
+
+ const onStreamDone = async () => {
+ console.log("TTS Stream complete.");
+ setIsStreamingPlaying(false);
+ if (accumulatedChunks.length > 0) {
+ // Concatenate all chunks and save the blob
+ const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0);
+ const result = new Float32Array(totalLen);
+ let offset = 0;
+ for (const c of accumulatedChunks) {
+ result.set(c, offset);
+ offset += c.length;
+ }
+ // resample to standard 44.1k for download/blob stability
+ const finalPcm = resampleBuffer(result, 24000, 44100);
+ const wavBlob = encodeWAV(finalPcm, 44100);
+
+ // Upload to persistent storage if messageId is available
+ if (messageId) {
+ try {
+ console.log(`Uploading audio for message ${messageId}...`);
+ await uploadMessageAudio(messageId, wavBlob);
+ } catch (uploadErr) {
+ console.warn("Failed to upload persistent audio", uploadErr);
+ }
+ }
+
+ // Post-update: find the last AI message and attach this blob
+ setChatHistory(prev => {
+ const next = [...prev];
+ // Find the latest assistant message that matches this text (or just the latest)
+ for (let i = next.length - 1; i >= 0; i--) {
+ if (!next[i].isUser && !next[i].audioBlob) {
+ next[i].audioBlob = wavBlob;
+ if (messageId) next[i].id = messageId;
+ break;
+ }
+ }
+ return next;
+ });
+ }
+ };
+
+ if (!text || !text.trim()) {
+ console.warn("No text to synthesize, skipping audio playback.");
+ return;
+ }
+
+ await streamSpeech(text, onChunkReceived, onStreamDone, localActivePrefs.tts);
+
+ } catch (err) {
+ console.error("Failed to stream speech:", err);
+ setIsStreamingPlaying(false);
+ setStatus(`Error: Failed to stream speech. ${err.message}`);
+ setErrorMessage(`Failed to stream speech: ${err.message}`);
+ setShowErrorModal(true);
+ } finally {
+ setBusy(false);
+ lastRequestTimeRef.current = Date.now();
+ if (isAutoMode && isAutoListening) {
+ setStatus("Listening for voice...");
+ } else if (!isAutoMode) {
+ setStatus("Click the microphone to start recording.");
+ } else {
+ setStatus("Click to start listening.");
+ }
+ }
+ };
+
+ /**
+ * Specifically for manual replay/synthesis of any message (including history)
+ */
+ const synthesizeMessageAudio = async (index, text) => {
+ if (isBusy) return;
+ const accumulatedChunks = [];
+
+ if (chatHistory[index]?.audioBlob) return;
+
+ setIsBusy(true);
+ try {
+ if (!audioContextRef.current) {
+ audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)();
+ playbackTimeRef.current = audioContextRef.current.currentTime;
+ }
+ const audioContext = audioContextRef.current;
+
+ const onData = (rawFloat32Data, total, current) => {
+ accumulatedChunks.push(new Float32Array(rawFloat32Data));
+ if (total > 0) {
+ const progress = Math.min(Math.round((current / total) * 100), 100);
+ setChatHistory(prev => {
+ const next = [...prev];
+ if (next[index]) next[index].audioProgress = progress;
+ return next;
+ });
+ }
+
+ const float32Resampled = resampleBuffer(rawFloat32Data, 24000, audioContext.sampleRate);
+ const audioBuffer = audioContext.createBuffer(1, float32Resampled.length, audioContext.sampleRate);
+ audioBuffer.copyToChannel(float32Resampled, 0);
+ const source = audioContext.createBufferSource();
+ source.buffer = audioBuffer;
+ source.connect(audioContext.destination);
+ const startTime = Math.max(playbackTimeRef.current, audioContext.currentTime);
+ source.start(startTime);
+ playbackTimeRef.current = startTime + audioBuffer.duration;
+ playingSourcesRef.current.push(source);
+ };
+
+ const onDone = async () => {
+ if (accumulatedChunks.length > 0) {
+ const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0);
+ const result = new Float32Array(totalLen);
+ let offset = 0;
+ for (const c of accumulatedChunks) {
+ result.set(c, offset);
+ offset += c.length;
+ }
+ const finalPcm = resampleBuffer(result, 24000, 44100);
+ const wavBlob = encodeWAV(finalPcm, 44100);
+
+ const messageId = chatHistory[index]?.id;
+ if (messageId) {
+ try {
+ await uploadMessageAudio(messageId, wavBlob);
+ } catch (e) { console.warn("Upload failed during manual synthesis", e); }
+ }
+
+ setChatHistory(prev => {
+ const next = [...prev];
+ if (next[index]) next[index].audioBlob = wavBlob;
+ return next;
+ });
+ }
+ };
+
+ if (!text || !text.trim()) return;
+
+ await streamSpeech(text, onData, onDone, localActivePrefs.tts);
+ } catch (err) {
+ console.error("Manual synthesis failed", err);
+ } finally {
+ setIsBusy(false);
+ }
+ };
+
+ const processConversation = async (audioBlob) => {
+ setBusy(true);
+ console.log("Processing conversation...");
+ try {
+ const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000;
+ if (audioDuration < MINIMUM_AUDIO_DURATION_MS) {
+ const msg = "Your recording was too short. Please speak for at least half a second.";
+ console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`);
+ setStatus(msg);
+ if (!isAutoMode) {
+ setErrorMessage(msg);
+ setShowErrorModal(true);
+ }
+ lastRequestTimeRef.current = Date.now();
+ return;
+ }
+ if (audioBlob.size === 0) {
+ const msg = "Recording stopped, but no audio was captured. Please try again.";
+ console.warn("Audio blob is empty, skipping STT API call.");
+ setStatus(msg);
+ if (!isAutoMode) {
+ setErrorMessage(msg);
+ setShowErrorModal(true);
+ }
+ lastRequestTimeRef.current = Date.now();
+ return;
+ }
+
+ setStatus("Transcribing audio...");
+ const userText = await transcribeAudio(audioBlob, localActivePrefs.stt);
+ addMessage(userText, true);
+
+ setStatus("AI is thinking...");
+ const aiResponse = await chatWithAI(sessionId, userText, localActivePrefs.llm || "gemini");
+ addMessage(aiResponse.answer, false, aiResponse.message_id);
+
+ fetchTokenUsage();
+ await playStreamingAudio(aiResponse.answer, aiResponse.message_id);
+ } catch (error) {
+ console.error("Conversation processing failed:", error);
+ setStatus(`Error: ${error.message}`);
+ setErrorMessage(`An error occurred: ${error.message}`);
+ setShowErrorModal(true);
+ } finally {
+ setBusy(false);
+ lastRequestTimeRef.current = Date.now();
+ // This is the main correction: only stop streams if not in auto-listening mode
+ if (!isAutoMode) {
+ setStatus("Click the microphone to start recording.");
+ stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
+ } else if (isAutoMode && isAutoListening) {
+ setStatus("Listening for voice...");
+ } else {
+ setStatus("Click to start listening.");
+ }
+ }
+ };
+
+ const startManualRecording = async () => {
+ if (isRecording) return;
+
+ try {
+ if (!sessionId) {
+ setErrorMessage("Please wait for the chat session to be initialized.");
+ setShowErrorModal(true);
+ return;
+ }
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ streamRef.current = stream;
+ mediaRecorderRef.current = new MediaRecorder(stream);
+ mediaRecorderRef.current.start();
+ audioChunksRef.current = [];
+
+ mediaRecorderRef.current.ondataavailable = (event) => {
+ audioChunksRef.current.push(event.data);
+ };
+
+ mediaRecorderRef.current.onstop = async () => {
+ if (streamRef.current) {
+ streamRef.current.getTracks().forEach(track => track.stop());
+ streamRef.current = null;
+ }
+ mediaRecorderRef.current = null;
+ const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
+ await processConversation(audioBlob);
+ };
+ setIsRecording(true);
+ isRecordingRef.current = true;
+ setStatus("Recording... Click to stop.");
+ } catch (err) {
+ console.error("Error accessing microphone:", err);
+ setStatus("Error: Cannot access microphone.");
+ setErrorMessage("Microphone access has been denied. Please enable it.");
+ setShowErrorModal(true);
+ }
+ };
+
+ const stopManualRecording = () => {
+ if (mediaRecorderRef.current?.state === "recording") {
+ setBusy(true);
+ setIsRecording(false);
+ mediaRecorderRef.current.stop();
+ }
+ };
+
+ const startAutoListening = async () => {
+ try {
+ if (!sessionId) {
+ setErrorMessage("Please wait for the chat session to be initialized.");
+ setShowErrorModal(true);
+ return;
+ }
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ vadStreamRef.current = stream;
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+ const source = audioContext.createMediaStreamSource(stream);
+ const bufferSize = 4096;
+ const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
+ scriptProcessorRef.current = scriptProcessor;
+ source.connect(scriptProcessor);
+ scriptProcessor.connect(audioContext.destination);
+
+ scriptProcessor.onaudioprocess = (event) => {
+ const inputBuffer = event.inputBuffer.getChannelData(0);
+ let sum = 0.0;
+ for (let i = 0; i < inputBuffer.length; i++) {
+ sum += inputBuffer[i] * inputBuffer[i];
+ }
+ const volume = Math.sqrt(sum / inputBuffer.length);
+ const isVoiceDetected = volume > VAD_THRESHOLD;
+ const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current;
+ const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS;
+
+ if (isVoiceDetected && !isBusyRef.current) {
+ if (silenceTimeoutRef.current) {
+ clearTimeout(silenceTimeoutRef.current);
+ silenceTimeoutRef.current = null;
+ }
+ if (!isRecordingRef.current && isCooldownPassed) {
+ startAutoRecording(stream);
+ }
+ } else if (isRecordingRef.current) {
+ if (!silenceTimeoutRef.current) {
+ silenceTimeoutRef.current = setTimeout(() => {
+ stopAutoRecording();
+ }, VAD_SILENCE_DURATION);
+ }
+ }
+ };
+ setIsAutoListening(true);
+ setStatus("Listening for voice...");
+ } catch (err) {
+ console.error("Error accessing microphone for VAD:", err);
+ setStatus("Error: Cannot access microphone.");
+ setErrorMessage("Microphone access has been denied. Please enable it.");
+ setShowErrorModal(true);
+ }
+ };
+
+ const stopAutoListening = () => {
+ setIsAutoListening(false);
+ // Pass the refs here to the utility function
+ stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
+ setStatus("Click to start listening.");
+ };
+
+ const startAutoRecording = (stream) => {
+ if (mediaRecorderRef.current?.state === "recording") return;
+ mediaRecorderRef.current = new MediaRecorder(stream);
+ audioChunksRef.current = [];
+ mediaRecorderRef.current.ondataavailable = (event) => {
+ audioChunksRef.current.push(event.data);
+ };
+ mediaRecorderRef.current.onstop = async () => {
+ isRecordingRef.current = false;
+ setIsRecording(false);
+ if (audioChunksRef.current.length > 0) {
+ setIsBusy(true);
+ setStatus("Transcribing audio...");
+ const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
+ await processConversation(audioBlob);
+ } else {
+ setIsBusy(false);
+ setStatus("Listening for voice...");
+ }
+ };
+ mediaRecorderRef.current.start();
+ isRecordingRef.current = true;
+ setIsRecording(true);
+ setStatus("Recording...");
+ };
+
+ const stopAutoRecording = () => {
+ if (mediaRecorderRef.current?.state === "recording") {
+ mediaRecorderRef.current.stop();
+ }
+ };
+
+ const handleMicClick = () => {
+ // Correctly pass the refs to the utility function
+ stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
+
+ if (!isConfigured) {
+ setErrorMessage("Voice Chat requires valid configurations. Please visit Settings to set up your LLM, TTS, and STT providers.");
+ setShowErrorModal(true);
+ return;
+ }
+
+ if (isBusy) return;
+
+ if (isAutoMode) {
+ if (isAutoListening) {
+ stopAutoListening();
+ } else {
+ startAutoListening();
+ }
+ } else {
+ if (isRecording) {
+ stopManualRecording();
+ } else {
+ startManualRecording();
+ }
+ }
+ };
+
+ const handleNewSession = async () => {
+ setChatHistory([]);
+ localStorage.removeItem("sessionId_voice_chat");
+
+ setIsBusy(true);
+ setStatus("Starting new session...");
+ const prefProvider = userConfigData?.effective?.llm?.active_provider || "gemini";
+ try {
+ const newSessionId = await getSessionId("voice_chat", prefProvider, {
+ stt_provider_name: localActivePrefs.stt || userConfigData?.effective?.stt?.active_provider,
+ tts_provider_name: localActivePrefs.tts || userConfigData?.effective?.tts?.active_provider
+ });
+ setLocalActivePrefs(prev => ({ ...prev, llm: prefProvider }));
+ setSessionId(newSessionId);
+ sessionIdRef.current = newSessionId;
+ fetchTokenUsage();
+ setStatus("Click the microphone to start recording.");
+ } catch (err) {
+ console.error("Failed to start new voice session", err);
+ setStatus("Error creating new session.");
+ } finally {
+ setIsBusy(false);
+ }
+ };
+
+ const handleSwitchSession = useCallback(async (targetSessionId) => {
+ localStorage.setItem("sessionId_voice_chat", targetSessionId);
+ setSessionId(targetSessionId);
+ sessionIdRef.current = targetSessionId;
+
+ setChatHistory([]);
+ setStatus(`Loading session #${targetSessionId}...`);
+
+ try {
+ // Because we are switching sessions, we MUST sync the settings layout
+ // back to the new session's provider_name
+ try {
+ const switchedSession = await getSession(targetSessionId);
+ if (switchedSession && switchedSession.provider_name) {
+ setLocalActivePrefs(prev => ({
+ ...prev,
+ llm: switchedSession.provider_name
+ }));
+ }
+ } catch (e) { console.warn("Could not fetch switched session provider", e); }
+
+ const messagesData = await getSessionMessages(targetSessionId);
+ if (messagesData && messagesData.messages) {
+ const mappedHistoryPromises = messagesData.messages.map(async (msg) => {
+ let audioBlob = null;
+ if (msg.has_audio) {
+ try {
+ audioBlob = await fetchMessageAudio(msg.id);
+ } catch (e) {
+ console.warn(`Failed to fetch audio for message ${msg.id} during switch`, e);
+ }
+ }
+ return {
+ id: msg.id,
+ text: msg.content,
+ isUser: msg.sender === 'user',
+ timestamp: msg.created_at,
+ isFromHistory: true,
+ audioBlob: audioBlob
+ };
+ });
+ const mappedHistory = await Promise.all(mappedHistoryPromises);
+ setChatHistory(mappedHistory);
+ }
+ fetchTokenUsage();
+ setStatus(`Click the microphone to start recording.`);
+ } catch (error) {
+ console.error("Failed to switch session:", error);
+ setStatus("Failed to load session history.");
+ }
+ }, [fetchTokenUsage]);
+
+ return {
+ chatHistory,
+ status,
+ isBusy,
+ isRecording,
+ isAutoMode,
+ isAutoListening,
+ sessionId,
+ showErrorModal,
+ errorMessage,
+ tokenUsage,
+ isConfigured,
+ missingConfigs,
+ userConfigData,
+ localActivePrefs,
+ setLocalActivePrefs,
+ setIsAutoMode,
+ handleMicClick,
+ handleNewSession,
+ handleSwitchSession,
+ setShowErrorModal,
+ setErrorMessage,
+ synthesizeMessageAudio,
+ isStreamingPlaying,
+ stopStreamingPlayback
+ };
+};
+
+export default useVoiceChat;
diff --git a/frontend/src/features/voice/index.js b/frontend/src/features/voice/index.js
new file mode 100644
index 0000000..df33c1c
--- /dev/null
+++ b/frontend/src/features/voice/index.js
@@ -0,0 +1,6 @@
+// Feature entry point for voice chat.
+// Exports the primary UI and hook implementations for the voice chat feature.
+
+export { default as VoiceChatPage } from "./pages/VoiceChatPage";
+export { default as useVoiceChat } from "./hooks/useVoiceChat";
+export { default as VoiceControls } from "./components/VoiceControls";
diff --git a/frontend/src/features/voice/pages/VoiceChatPage.js b/frontend/src/features/voice/pages/VoiceChatPage.js
new file mode 100644
index 0000000..53eca38
--- /dev/null
+++ b/frontend/src/features/voice/pages/VoiceChatPage.js
@@ -0,0 +1,277 @@
+import React, { useState, useRef, useEffect } from "react";
+import { useVoiceChat, VoiceControls } from "..";
+import { ChatWindow } from "../../chat";
+import SessionSidebar from "../../../components/SessionSidebar";
+import { updateSession } from "../../../services/apiService";
+
+const VoiceChatPage = () => {
+ const chatContainerRef = useRef(null);
+ const [showConfigModal, setShowConfigModal] = useState(false);
+ const [savingConfig, setSavingConfig] = useState(false);
+ const [sidebarRefreshTick, setSidebarRefreshTick] = useState(0);
+
+ const {
+ chatHistory,
+ status,
+ isRecording,
+ isBusy,
+ isAutoMode,
+ isAutoListening,
+ showErrorModal,
+ errorMessage,
+ tokenUsage,
+ setIsAutoMode,
+ handleMicClick,
+ handleNewSession,
+ setShowErrorModal,
+ handleSwitchSession,
+ sessionId,
+ isConfigured,
+ missingConfigs,
+ userConfigData,
+ localActivePrefs,
+ setLocalActivePrefs,
+ setErrorMessage,
+ synthesizeMessageAudio,
+ isStreamingPlaying,
+ stopStreamingPlayback
+ } = useVoiceChat({ chatContainerRef });
+
+ useEffect(() => {
+ if (chatContainerRef.current) {
+ chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
+ }
+ }, [chatHistory]);
+
+ const toggleAutoMode = () => {
+ setIsAutoMode(!isAutoMode);
+ };
+
+ const handleSaveQuickConfig = async () => {
+ try {
+ setSavingConfig(true);
+ if (sessionId && localActivePrefs.llm) {
+ await updateSession(sessionId, { provider_name: localActivePrefs.llm });
+ setSidebarRefreshTick(t => t + 1);
+ }
+ setShowConfigModal(false);
+ } catch (e) {
+ console.error(e);
+ setErrorMessage("Failed to apply the session-local providers.");
+ setShowErrorModal(true);
+ } finally {
+ setSavingConfig(false);
+ }
+ };
+
+ return (
+
+
+
+ {/* Main content area */}
+
+
+ {/* Chat Area Box */}
+
+
+
+ {/* Box Header */}
+
+
+
+
+ Voice Chat Assistant
+ Real-time Conversational AI
+
+ {!isConfigured && (
+
+
+
+
Missing Key
+
+ {missingConfigs?.map((m, i) => - {m}
)}
+
+
+
+ )}
+
+
+
+
+
+ Token Usage
+
+
+
+
80 ? 'bg-red-500' : 'bg-indigo-500'}`}
+ style={{ width: `${Math.min(tokenUsage?.percentage || 0, 100)}%` }}
+ >
+
+
80 ? 'text-red-500' : 'text-gray-400'}`}>
+ {tokenUsage?.percentage || 0}%
+
+
+
+
+
+
+
+ {/* Chat Scroll Area */}
+
+
+
+
+ {/* Controls Wrapper */}
+
+
+
+
+
+
+
+
+ {/* Error Modal */}
+ {showErrorModal && (
+
+
+
+
Attention Required
+
{errorMessage}
+
+
+
+ )}
+
+ {/* Quick Settings Modal */}
+ {showConfigModal && (
+
+
+
+
+
+ Session Engine
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ )}
+
+ );
+};
+
+export default VoiceChatPage;
\ No newline at end of file
diff --git a/frontend/src/hooks/useVoiceChat.js b/frontend/src/hooks/useVoiceChat.js
deleted file mode 100644
index 9f13faa..0000000
--- a/frontend/src/hooks/useVoiceChat.js
+++ /dev/null
@@ -1,791 +0,0 @@
-// src/hooks/useVoiceChat.js
-
-// This file is a custom React hook that contains all the stateful logic
-// and side effects for the voice chat application.
-
-import { useState, useRef, useEffect, useCallback } from "react";
-import {
- createSession,
- transcribeAudio,
- chatWithAI,
- streamSpeech,
- getSessionMessages,
- getSessionTokenStatus,
- getUserConfig,
- getSession,
- uploadMessageAudio,
- fetchMessageAudio
-} from "../services/apiService";
-import { getSessionId } from "../services/websocket";
-import {
- stopAllPlayingAudio,
- stopAllMediaStreams,
- resampleBuffer,
- encodeWAV,
-} from "../services/audioUtils";
-
-// Constants for Voice Activity Detection and timing
-const VAD_THRESHOLD = 0.01;
-const VAD_SILENCE_DURATION = 2500;
-const MINIMUM_AUDIO_DURATION_MS = 500;
-const AUTO_MODE_COOLDOWN_MS = 3000;
-
-const useVoiceChat = ({ chatContainerRef }) => {
- const [chatHistory, setChatHistory] = useState([]);
- const [status, setStatus] = useState("Click the microphone to start recording.");
- const [isBusy, setIsBusy] = useState(false);
- const [isRecording, setIsRecording] = useState(false);
- const [showErrorModal, setShowErrorModal] = useState(false);
- const [errorMessage, setErrorMessage] = useState("");
- const [sessionId, setSessionId] = useState(null);
- const [isAutoMode, setIsAutoMode] = useState(false);
- const [isAutoListening, setIsAutoListening] = useState(false);
- const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 });
- const [isConfigured, setIsConfigured] = useState(true);
- const [missingConfigs, setMissingConfigs] = useState([]);
- const [userConfigData, setUserConfigData] = useState(null);
- const [localActivePrefs, setLocalActivePrefs] = useState({ llm: '', tts: '', stt: '' });
-
- // All refs must be declared here, inside the custom hook.
- const sessionIdRef = useRef(null);
- const mediaRecorderRef = useRef(null);
- const audioChunksRef = useRef([]);
- const audioContextRef = useRef(null);
- const playbackTimeRef = useRef(0);
- const isRecordingRef = useRef(false);
- const isBusyRef = useRef(false);
- const playingSourcesRef = useRef([]);
- const vadStreamRef = useRef(null);
- const scriptProcessorRef = useRef(null);
- const silenceTimeoutRef = useRef(null);
- const lastRequestTimeRef = useRef(0);
- const streamRef = useRef(null);
-
- const [isStreamingPlaying, setIsStreamingPlaying] = useState(false);
-
- // Helper to keep ref and state in sync
- const setBusy = (val) => {
- setIsBusy(val);
- isBusyRef.current = val;
- };
-
- /**
- * Stops any currently playing streaming audio.
- */
- const stopStreamingPlayback = useCallback(() => {
- stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
- setIsStreamingPlaying(false);
- setBusy(false);
- }, []);
-
- const fetchTokenUsage = useCallback(async () => {
- if (!sessionIdRef.current) return;
- try {
- const usage = await getSessionTokenStatus(sessionIdRef.current);
- setTokenUsage(usage);
- } catch (err) {
- console.warn("Failed to fetch voice token usage", err);
- }
- }, []);
-
- // --- Initial Session Creation Effect ---
- useEffect(() => {
- const startSession = async () => {
- setBusy(true);
- setStatus("Loading chat session...");
- try {
- let configDataToUse = null;
- let providerToUse = "gemini";
- try {
- configDataToUse = await getUserConfig();
- setUserConfigData(configDataToUse);
- if (configDataToUse.effective?.llm?.active_provider) {
- providerToUse = configDataToUse.effective.llm.active_provider;
- }
- } catch (e) {
- console.warn("Could not load user config for Voice", e);
- }
-
- const currentSessionId = await getSessionId("voice_chat", providerToUse);
- setSessionId(currentSessionId);
- sessionIdRef.current = currentSessionId;
-
- // Try to load chat history
- try {
- const messagesData = await getSessionMessages(currentSessionId);
- if (messagesData && messagesData.messages && messagesData.messages.length > 0) {
- const formattedHistoryPromises = messagesData.messages.map(async (msg) => {
- let audioBlob = null;
- if (msg.has_audio) {
- try {
- audioBlob = await fetchMessageAudio(msg.id);
- } catch (e) {
- console.warn(`Failed to fetch audio for message ${msg.id}`, e);
- }
- }
- return {
- id: msg.id,
- isUser: msg.sender === "user",
- text: msg.content,
- reasoning: msg.reasoning_content,
- timestamp: msg.created_at,
-
- isFromHistory: true,
- audioBlob: audioBlob
- };
- });
- const formattedHistory = await Promise.all(formattedHistoryPromises);
- setChatHistory(formattedHistory);
- }
- } catch (historyErr) {
- console.warn("Failed to load voice chat history", historyErr);
- }
-
- // Load initial tokens
- await fetchTokenUsage();
-
- // Check if configuration is fully populated
- try {
- const eff = configDataToUse?.effective || {};
-
- let sessionLlmProvider = providerToUse;
- try {
- const currentSession = await getSession(currentSessionId);
- if (currentSession && currentSession.provider_name) {
- sessionLlmProvider = currentSession.provider_name;
- }
- } catch (sessionErr) {
- console.warn("Could not fetch session details", sessionErr);
- }
-
- setLocalActivePrefs({
- llm: sessionLlmProvider,
- tts: eff.tts?.active_provider || '',
- stt: eff.stt?.active_provider || ''
- });
-
- const missing = [];
-
- const llmProviders = eff.llm?.providers || {};
- const hasLLMKey = Object.values(llmProviders).some(p => p.api_key && p.api_key !== 'None');
- if (!hasLLMKey) missing.push("Language Model (LLM) API Key");
-
- const ttsProviders = eff.tts?.providers || {};
- const hasTTSKey = Object.values(ttsProviders).some(p => p.api_key && p.api_key !== 'None');
- if (!hasTTSKey) missing.push("Text-to-Speech (TTS) API Key");
-
- const sttProviders = eff.stt?.providers || {};
- const hasSTTKey = Object.values(sttProviders).some(p => p.api_key && p.api_key !== 'None');
- if (!hasSTTKey) missing.push("Speech-to-Text (STT) API Key");
-
- if (missing.length > 0) {
- setIsConfigured(false);
- setMissingConfigs(missing);
- setStatus(`Feature not usable. Missing: ${missing.join(', ')}`);
- } else {
- setIsConfigured(true);
- setMissingConfigs([]);
- setStatus("Click the microphone to start recording.");
- }
- } catch (configErr) {
- console.warn("Failed to load user config", configErr);
- setIsConfigured(false);
- setMissingConfigs(["Failed to load complete configuration"]);
- }
-
- console.log(`Voice Session loaded with ID: ${currentSessionId}`);
- } catch (err) {
- console.error("Error creating session:", err);
- setStatus(`Error: Could not start session. ${err.message}`);
- setErrorMessage(`Failed to create a chat session: ${err.message}`);
- setShowErrorModal(true);
- } finally {
- setIsBusy(false);
- }
- };
- startSession();
-
- return () => {
- // Pass the refs to the utility function here
- stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
- stopStreamingPlayback();
- };
- }, [stopStreamingPlayback]);
-
- // New useEffect hook to automatically scroll to the bottom of the chat history
- // The fix: `chatContainerRef` is now included in the dependency array.
- useEffect(() => {
- if (chatContainerRef.current) {
- chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
- }
- }, [chatHistory, chatContainerRef]);
-
- const addMessage = (text, isUser, id = null) => {
- setChatHistory((prevHistory) => [...prevHistory, {
- text,
- isUser,
- id,
- timestamp: new Date().toISOString()
- }]);
- };
-
- /**
- * Plays a stream of audio chunks using the Web Audio API by fetching them from the API.
- * This is the orchestrator that uses the stateless streamSpeech API function.
- * @param {string} text - The text to be synthesized by the TTS service.
- * @param {number} messageId - The ID of the message to associated the audio with.
- */
- const playStreamingAudio = async (text, messageId = null) => {
- setBusy(true);
- setIsStreamingPlaying(true);
- setStatus("Streaming audio...");
-
- // Stop any existing playback
- stopStreamingPlayback();
- setBusy(true); // stopStreamingPlayback sets it to false, we want it true during this process
-
- // Track chunks to store in history
- const accumulatedChunks = [];
-
- try {
- if (!audioContextRef.current) {
- audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)();
- playbackTimeRef.current = audioContextRef.current.currentTime;
- }
-
- const audioContext = audioContextRef.current;
-
- const onChunkReceived = (rawFloat32Data, totalChunks, currentChunkIndex) => {
- // Collect for storage
- accumulatedChunks.push(new Float32Array(rawFloat32Data));
-
- // Update UI progress
- if (totalChunks > 0) {
- const progress = Math.min(Math.round((currentChunkIndex / totalChunks) * 100), 100);
- setChatHistory(prev => {
- const next = [...prev];
- for (let i = next.length - 1; i >= 0; i--) {
- if (!next[i].isUser && !next[i].audioBlob) {
- next[i].audioProgress = progress;
- break;
- }
- }
- return next;
- });
- }
- const float32Resampled = resampleBuffer(
- rawFloat32Data,
- 24000,
- audioContext.sampleRate
- );
- const audioBuffer = audioContext.createBuffer(
- 1,
- float32Resampled.length,
- audioContext.sampleRate
- );
- audioBuffer.copyToChannel(float32Resampled, 0);
-
- // Apply a very short fade-in and fade-out (2ms) to eliminate "clicks" at segment boundaries
- const fadeSamps = Math.floor(audioContext.sampleRate * 0.002);
- const chanData = audioBuffer.getChannelData(0);
- if (chanData.length > fadeSamps * 2) {
- for (let i = 0; i < fadeSamps; i++) {
- chanData[i] *= (i / fadeSamps);
- chanData[chanData.length - 1 - i] *= (i / fadeSamps);
- }
- }
-
- const source = audioContext.createBufferSource();
- source.buffer = audioBuffer;
- source.connect(audioContext.destination);
-
- const currentTime = audioContext.currentTime;
- const startTime = Math.max(playbackTimeRef.current, currentTime);
-
- source.start(startTime);
- playbackTimeRef.current = startTime + audioBuffer.duration;
-
- playingSourcesRef.current.push(source);
- source.onended = () => {
- playingSourcesRef.current = playingSourcesRef.current.filter(
- (s) => s !== source
- );
- };
- };
-
- const onStreamDone = async () => {
- console.log("TTS Stream complete.");
- setIsStreamingPlaying(false);
- if (accumulatedChunks.length > 0) {
- // Concatenate all chunks and save the blob
- const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0);
- const result = new Float32Array(totalLen);
- let offset = 0;
- for (const c of accumulatedChunks) {
- result.set(c, offset);
- offset += c.length;
- }
- // resample to standard 44.1k for download/blob stability
- const finalPcm = resampleBuffer(result, 24000, 44100);
- const wavBlob = encodeWAV(finalPcm, 44100);
-
- // Upload to persistent storage if messageId is available
- if (messageId) {
- try {
- console.log(`Uploading audio for message ${messageId}...`);
- await uploadMessageAudio(messageId, wavBlob);
- } catch (uploadErr) {
- console.warn("Failed to upload persistent audio", uploadErr);
- }
- }
-
- // Post-update: find the last AI message and attach this blob
- setChatHistory(prev => {
- const next = [...prev];
- // Find the latest assistant message that matches this text (or just the latest)
- for (let i = next.length - 1; i >= 0; i--) {
- if (!next[i].isUser && !next[i].audioBlob) {
- next[i].audioBlob = wavBlob;
- if (messageId) next[i].id = messageId;
- break;
- }
- }
- return next;
- });
- }
- };
-
- if (!text || !text.trim()) {
- console.warn("No text to synthesize, skipping audio playback.");
- return;
- }
-
- await streamSpeech(text, onChunkReceived, onStreamDone, localActivePrefs.tts);
-
- } catch (err) {
- console.error("Failed to stream speech:", err);
- setIsStreamingPlaying(false);
- setStatus(`Error: Failed to stream speech. ${err.message}`);
- setErrorMessage(`Failed to stream speech: ${err.message}`);
- setShowErrorModal(true);
- } finally {
- setBusy(false);
- lastRequestTimeRef.current = Date.now();
- if (isAutoMode && isAutoListening) {
- setStatus("Listening for voice...");
- } else if (!isAutoMode) {
- setStatus("Click the microphone to start recording.");
- } else {
- setStatus("Click to start listening.");
- }
- }
- };
-
- /**
- * Specifically for manual replay/synthesis of any message (including history)
- */
- const synthesizeMessageAudio = async (index, text) => {
- if (isBusy) return;
- const accumulatedChunks = [];
-
- if (chatHistory[index]?.audioBlob) return;
-
- setIsBusy(true);
- try {
- if (!audioContextRef.current) {
- audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)();
- playbackTimeRef.current = audioContextRef.current.currentTime;
- }
- const audioContext = audioContextRef.current;
-
- const onData = (rawFloat32Data, total, current) => {
- accumulatedChunks.push(new Float32Array(rawFloat32Data));
- if (total > 0) {
- const progress = Math.min(Math.round((current / total) * 100), 100);
- setChatHistory(prev => {
- const next = [...prev];
- if (next[index]) next[index].audioProgress = progress;
- return next;
- });
- }
-
- const float32Resampled = resampleBuffer(rawFloat32Data, 24000, audioContext.sampleRate);
- const audioBuffer = audioContext.createBuffer(1, float32Resampled.length, audioContext.sampleRate);
- audioBuffer.copyToChannel(float32Resampled, 0);
- const source = audioContext.createBufferSource();
- source.buffer = audioBuffer;
- source.connect(audioContext.destination);
- const startTime = Math.max(playbackTimeRef.current, audioContext.currentTime);
- source.start(startTime);
- playbackTimeRef.current = startTime + audioBuffer.duration;
- playingSourcesRef.current.push(source);
- };
-
- const onDone = async () => {
- if (accumulatedChunks.length > 0) {
- const totalLen = accumulatedChunks.reduce((acc, c) => acc + c.length, 0);
- const result = new Float32Array(totalLen);
- let offset = 0;
- for (const c of accumulatedChunks) {
- result.set(c, offset);
- offset += c.length;
- }
- const finalPcm = resampleBuffer(result, 24000, 44100);
- const wavBlob = encodeWAV(finalPcm, 44100);
-
- const messageId = chatHistory[index]?.id;
- if (messageId) {
- try {
- await uploadMessageAudio(messageId, wavBlob);
- } catch (e) { console.warn("Upload failed during manual synthesis", e); }
- }
-
- setChatHistory(prev => {
- const next = [...prev];
- if (next[index]) next[index].audioBlob = wavBlob;
- return next;
- });
- }
- };
-
- if (!text || !text.trim()) return;
-
- await streamSpeech(text, onData, onDone, localActivePrefs.tts);
- } catch (err) {
- console.error("Manual synthesis failed", err);
- } finally {
- setIsBusy(false);
- }
- };
-
- const processConversation = async (audioBlob) => {
- setBusy(true);
- console.log("Processing conversation...");
- try {
- const audioDuration = audioBlob.size / (48000 * 2 * 1) * 1000;
- if (audioDuration < MINIMUM_AUDIO_DURATION_MS) {
- const msg = "Your recording was too short. Please speak for at least half a second.";
- console.log(`Audio too short (${audioDuration.toFixed(2)}ms), skipping.`);
- setStatus(msg);
- if (!isAutoMode) {
- setErrorMessage(msg);
- setShowErrorModal(true);
- }
- lastRequestTimeRef.current = Date.now();
- return;
- }
- if (audioBlob.size === 0) {
- const msg = "Recording stopped, but no audio was captured. Please try again.";
- console.warn("Audio blob is empty, skipping STT API call.");
- setStatus(msg);
- if (!isAutoMode) {
- setErrorMessage(msg);
- setShowErrorModal(true);
- }
- lastRequestTimeRef.current = Date.now();
- return;
- }
-
- setStatus("Transcribing audio...");
- const userText = await transcribeAudio(audioBlob, localActivePrefs.stt);
- addMessage(userText, true);
-
- setStatus("AI is thinking...");
- const aiResponse = await chatWithAI(sessionId, userText, localActivePrefs.llm || "gemini");
- addMessage(aiResponse.answer, false, aiResponse.message_id);
-
- fetchTokenUsage();
- await playStreamingAudio(aiResponse.answer, aiResponse.message_id);
- } catch (error) {
- console.error("Conversation processing failed:", error);
- setStatus(`Error: ${error.message}`);
- setErrorMessage(`An error occurred: ${error.message}`);
- setShowErrorModal(true);
- } finally {
- setBusy(false);
- lastRequestTimeRef.current = Date.now();
- // This is the main correction: only stop streams if not in auto-listening mode
- if (!isAutoMode) {
- setStatus("Click the microphone to start recording.");
- stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
- } else if (isAutoMode && isAutoListening) {
- setStatus("Listening for voice...");
- } else {
- setStatus("Click to start listening.");
- }
- }
- };
-
- const startManualRecording = async () => {
- if (isRecording) return;
-
- try {
- if (!sessionId) {
- setErrorMessage("Please wait for the chat session to be initialized.");
- setShowErrorModal(true);
- return;
- }
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
- streamRef.current = stream;
- mediaRecorderRef.current = new MediaRecorder(stream);
- mediaRecorderRef.current.start();
- audioChunksRef.current = [];
-
- mediaRecorderRef.current.ondataavailable = (event) => {
- audioChunksRef.current.push(event.data);
- };
-
- mediaRecorderRef.current.onstop = async () => {
- if (streamRef.current) {
- streamRef.current.getTracks().forEach(track => track.stop());
- streamRef.current = null;
- }
- mediaRecorderRef.current = null;
- const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
- await processConversation(audioBlob);
- };
- setIsRecording(true);
- isRecordingRef.current = true;
- setStatus("Recording... Click to stop.");
- } catch (err) {
- console.error("Error accessing microphone:", err);
- setStatus("Error: Cannot access microphone.");
- setErrorMessage("Microphone access has been denied. Please enable it.");
- setShowErrorModal(true);
- }
- };
-
- const stopManualRecording = () => {
- if (mediaRecorderRef.current?.state === "recording") {
- setBusy(true);
- setIsRecording(false);
- mediaRecorderRef.current.stop();
- }
- };
-
- const startAutoListening = async () => {
- try {
- if (!sessionId) {
- setErrorMessage("Please wait for the chat session to be initialized.");
- setShowErrorModal(true);
- return;
- }
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
- vadStreamRef.current = stream;
- const audioContext = new (window.AudioContext || window.webkitAudioContext)();
- const source = audioContext.createMediaStreamSource(stream);
- const bufferSize = 4096;
- const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
- scriptProcessorRef.current = scriptProcessor;
- source.connect(scriptProcessor);
- scriptProcessor.connect(audioContext.destination);
-
- scriptProcessor.onaudioprocess = (event) => {
- const inputBuffer = event.inputBuffer.getChannelData(0);
- let sum = 0.0;
- for (let i = 0; i < inputBuffer.length; i++) {
- sum += inputBuffer[i] * inputBuffer[i];
- }
- const volume = Math.sqrt(sum / inputBuffer.length);
- const isVoiceDetected = volume > VAD_THRESHOLD;
- const timeSinceLastRequest = Date.now() - lastRequestTimeRef.current;
- const isCooldownPassed = timeSinceLastRequest > AUTO_MODE_COOLDOWN_MS;
-
- if (isVoiceDetected && !isBusyRef.current) {
- if (silenceTimeoutRef.current) {
- clearTimeout(silenceTimeoutRef.current);
- silenceTimeoutRef.current = null;
- }
- if (!isRecordingRef.current && isCooldownPassed) {
- startAutoRecording(stream);
- }
- } else if (isRecordingRef.current) {
- if (!silenceTimeoutRef.current) {
- silenceTimeoutRef.current = setTimeout(() => {
- stopAutoRecording();
- }, VAD_SILENCE_DURATION);
- }
- }
- };
- setIsAutoListening(true);
- setStatus("Listening for voice...");
- } catch (err) {
- console.error("Error accessing microphone for VAD:", err);
- setStatus("Error: Cannot access microphone.");
- setErrorMessage("Microphone access has been denied. Please enable it.");
- setShowErrorModal(true);
- }
- };
-
- const stopAutoListening = () => {
- setIsAutoListening(false);
- // Pass the refs here to the utility function
- stopAllMediaStreams(vadStreamRef, mediaRecorderRef, scriptProcessorRef, streamRef);
- setStatus("Click to start listening.");
- };
-
- const startAutoRecording = (stream) => {
- if (mediaRecorderRef.current?.state === "recording") return;
- mediaRecorderRef.current = new MediaRecorder(stream);
- audioChunksRef.current = [];
- mediaRecorderRef.current.ondataavailable = (event) => {
- audioChunksRef.current.push(event.data);
- };
- mediaRecorderRef.current.onstop = async () => {
- isRecordingRef.current = false;
- setIsRecording(false);
- if (audioChunksRef.current.length > 0) {
- setIsBusy(true);
- setStatus("Transcribing audio...");
- const audioBlob = new Blob(audioChunksRef.current, { type: "audio/wav" });
- await processConversation(audioBlob);
- } else {
- setIsBusy(false);
- setStatus("Listening for voice...");
- }
- };
- mediaRecorderRef.current.start();
- isRecordingRef.current = true;
- setIsRecording(true);
- setStatus("Recording...");
- };
-
- const stopAutoRecording = () => {
- if (mediaRecorderRef.current?.state === "recording") {
- mediaRecorderRef.current.stop();
- }
- };
-
- const handleMicClick = () => {
- // Correctly pass the refs to the utility function
- stopAllPlayingAudio(playingSourcesRef, audioContextRef, playbackTimeRef);
-
- if (!isConfigured) {
- setErrorMessage("Voice Chat requires valid configurations. Please visit Settings to set up your LLM, TTS, and STT providers.");
- setShowErrorModal(true);
- return;
- }
-
- if (isBusy) return;
-
- if (isAutoMode) {
- if (isAutoListening) {
- stopAutoListening();
- } else {
- startAutoListening();
- }
- } else {
- if (isRecording) {
- stopManualRecording();
- } else {
- startManualRecording();
- }
- }
- };
-
- const handleNewSession = async () => {
- setChatHistory([]);
- localStorage.removeItem("sessionId_voice_chat");
-
- setIsBusy(true);
- setStatus("Starting new session...");
- const prefProvider = userConfigData?.effective?.llm?.active_provider || "gemini";
- try {
- const newSessionId = await getSessionId("voice_chat", prefProvider, {
- stt_provider_name: localActivePrefs.stt || userConfigData?.effective?.stt?.active_provider,
- tts_provider_name: localActivePrefs.tts || userConfigData?.effective?.tts?.active_provider
- });
- setLocalActivePrefs(prev => ({ ...prev, llm: prefProvider }));
- setSessionId(newSessionId);
- sessionIdRef.current = newSessionId;
- fetchTokenUsage();
- setStatus("Click the microphone to start recording.");
- } catch (err) {
- console.error("Failed to start new voice session", err);
- setStatus("Error creating new session.");
- } finally {
- setIsBusy(false);
- }
- };
-
- const handleSwitchSession = useCallback(async (targetSessionId) => {
- localStorage.setItem("sessionId_voice_chat", targetSessionId);
- setSessionId(targetSessionId);
- sessionIdRef.current = targetSessionId;
-
- setChatHistory([]);
- setStatus(`Loading session #${targetSessionId}...`);
-
- try {
- // Because we are switching sessions, we MUST sync the settings layout
- // back to the new session's provider_name
- try {
- const switchedSession = await getSession(targetSessionId);
- if (switchedSession && switchedSession.provider_name) {
- setLocalActivePrefs(prev => ({
- ...prev,
- llm: switchedSession.provider_name
- }));
- }
- } catch (e) { console.warn("Could not fetch switched session provider", e); }
-
- const messagesData = await getSessionMessages(targetSessionId);
- if (messagesData && messagesData.messages) {
- const mappedHistoryPromises = messagesData.messages.map(async (msg) => {
- let audioBlob = null;
- if (msg.has_audio) {
- try {
- audioBlob = await fetchMessageAudio(msg.id);
- } catch (e) {
- console.warn(`Failed to fetch audio for message ${msg.id} during switch`, e);
- }
- }
- return {
- id: msg.id,
- text: msg.content,
- isUser: msg.sender === 'user',
- timestamp: msg.created_at,
- isFromHistory: true,
- audioBlob: audioBlob
- };
- });
- const mappedHistory = await Promise.all(mappedHistoryPromises);
- setChatHistory(mappedHistory);
- }
- fetchTokenUsage();
- setStatus(`Click the microphone to start recording.`);
- } catch (error) {
- console.error("Failed to switch session:", error);
- setStatus("Failed to load session history.");
- }
- }, [fetchTokenUsage]);
-
- return {
- chatHistory,
- status,
- isBusy,
- isRecording,
- isAutoMode,
- isAutoListening,
- sessionId,
- showErrorModal,
- errorMessage,
- tokenUsage,
- isConfigured,
- missingConfigs,
- userConfigData,
- localActivePrefs,
- setLocalActivePrefs,
- setIsAutoMode,
- handleMicClick,
- handleNewSession,
- handleSwitchSession,
- setShowErrorModal,
- setErrorMessage,
- synthesizeMessageAudio,
- isStreamingPlaying,
- stopStreamingPlayback
- };
-};
-
-export default useVoiceChat;
diff --git a/frontend/src/pages/VoiceChatPage.js b/frontend/src/pages/VoiceChatPage.js
deleted file mode 100644
index 8a98adf..0000000
--- a/frontend/src/pages/VoiceChatPage.js
+++ /dev/null
@@ -1,278 +0,0 @@
-import React, { useState, useRef, useEffect } from "react";
-import useVoiceChat from "../hooks/useVoiceChat";
-import { ChatWindow } from "../features/chat";
-import Controls from "../components/VoiceControls";
-import SessionSidebar from "../components/SessionSidebar";
-import { updateSession } from "../services/apiService";
-
-const VoiceChatPage = () => {
- const chatContainerRef = useRef(null);
- const [showConfigModal, setShowConfigModal] = useState(false);
- const [savingConfig, setSavingConfig] = useState(false);
- const [sidebarRefreshTick, setSidebarRefreshTick] = useState(0);
-
- const {
- chatHistory,
- status,
- isRecording,
- isBusy,
- isAutoMode,
- isAutoListening,
- showErrorModal,
- errorMessage,
- tokenUsage,
- setIsAutoMode,
- handleMicClick,
- handleNewSession,
- setShowErrorModal,
- handleSwitchSession,
- sessionId,
- isConfigured,
- missingConfigs,
- userConfigData,
- localActivePrefs,
- setLocalActivePrefs,
- setErrorMessage,
- synthesizeMessageAudio,
- isStreamingPlaying,
- stopStreamingPlayback
- } = useVoiceChat({ chatContainerRef });
-
- useEffect(() => {
- if (chatContainerRef.current) {
- chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
- }
- }, [chatHistory]);
-
- const toggleAutoMode = () => {
- setIsAutoMode(!isAutoMode);
- };
-
- const handleSaveQuickConfig = async () => {
- try {
- setSavingConfig(true);
- if (sessionId && localActivePrefs.llm) {
- await updateSession(sessionId, { provider_name: localActivePrefs.llm });
- setSidebarRefreshTick(t => t + 1);
- }
- setShowConfigModal(false);
- } catch (e) {
- console.error(e);
- setErrorMessage("Failed to apply the session-local providers.");
- setShowErrorModal(true);
- } finally {
- setSavingConfig(false);
- }
- };
-
- return (
-
-
-
- {/* Main content area */}
-
-
- {/* Chat Area Box */}
-
-
-
- {/* Box Header */}
-
-
-
-
- Voice Chat Assistant
- Real-time Conversational AI
-
- {!isConfigured && (
-
-
-
-
Missing Key
-
- {missingConfigs?.map((m, i) => - {m}
)}
-
-
-
- )}
-
-
-
-
-
- Token Usage
-
-
-
-
80 ? 'bg-red-500' : 'bg-indigo-500'}`}
- style={{ width: `${Math.min(tokenUsage?.percentage || 0, 100)}%` }}
- >
-
-
80 ? 'text-red-500' : 'text-gray-400'}`}>
- {tokenUsage?.percentage || 0}%
-
-
-
-
-
-
-
- {/* Chat Scroll Area */}
-
-
-
-
- {/* Controls Wrapper */}
-
-
-
-
-
-
-
-
- {/* Error Modal */}
- {showErrorModal && (
-
-
-
-
Attention Required
-
{errorMessage}
-
-
-
- )}
-
- {/* Quick Settings Modal */}
- {showConfigModal && (
-
-
-
-
-
- Session Engine
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- )}
-
- );
-};
-
-export default VoiceChatPage;
\ No newline at end of file