<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>AI Voice Chat</title> <!-- Use Tailwind CSS for modern styling --> <script src="https://cdn.tailwindcss.com"></script> <style> body { font-family: 'Inter', sans-serif; background-color: #f3f4f6; } .chat-container { height: calc(100vh - 120px); overflow-y: auto; } /* Custom scrollbar for a cleaner look */ .chat-container::-webkit-scrollbar { width: 8px; } .chat-container::-webkit-scrollbar-track { background: #e5e7eb; } .chat-container::-webkit-scrollbar-thumb { background-color: #9ca3af; border-radius: 4px; } .message-bubble { max-width: 80%; padding: 10px 15px; border-radius: 20px; margin-bottom: 15px; word-wrap: break-word; } </style> </head> <body class="flex items-center justify-center min-h-screen"> <div class="bg-white p-8 rounded-2xl shadow-xl w-full max-w-lg mx-4 flex flex-col h-[90vh]"> <h1 class="text-3xl font-bold text-center mb-6 text-gray-800">AI Voice Chat</h1> <!-- Chat History Window --> <div id="chat-history" class="chat-container bg-gray-100 p-4 rounded-xl flex-grow mb-6 space-y-4"> <!-- Messages will be injected here --> </div> <!-- Controls Section --> <div class="flex flex-col items-center space-y-4"> <div id="status" class="text-center text-sm font-medium text-gray-500 min-h-[20px]"> Click the microphone to start recording. </div> <button id="mic-button" class="w-20 h-20 bg-indigo-600 rounded-full flex items-center justify-center text-white text-4xl shadow-lg transition-all duration-300 ease-in-out hover:bg-indigo-700 active:scale-95" aria-label="Start recording"> <svg id="mic-icon" class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"> <path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path> </svg> </button> </div> </div> <script> // --- Configuration --- const STT_ENDPOINT = "/stt/transcribe"; const LLM_ENDPOINT = "/chat"; // Replace with your actual LLM chat endpoint const TTS_ENDPOINT = "/speech"; // --- DOM Elements --- const micButton = document.getElementById('mic-button'); const micIcon = document.getElementById('mic-icon'); const chatHistory = document.getElementById('chat-history'); const statusElement = document.getElementById('status'); // --- State Variables --- let mediaRecorder; let audioChunks = []; let isRecording = false; // --- Helper Functions for UI --- function addMessage(text, isUser = true) { const messageDiv = document.createElement('div'); messageDiv.textContent = text; messageDiv.className = `message-bubble ${isUser ? 'bg-indigo-500 text-white self-end' : 'bg-gray-300 text-gray-800 self-start'}`; chatHistory.appendChild(messageDiv); chatHistory.scrollTop = chatHistory.scrollHeight; // Auto-scroll to the latest message } function setStatus(message) { statusElement.textContent = message; } function toggleRecordingUI(recording) { isRecording = recording; micIcon.innerHTML = recording ? '<svg class="w-8 h-8 animate-pulse" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M10 18a8 8 0 100-16 8 8 0 000 16zM8 9a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1zm0 4a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1z"></path></svg>' : '<svg class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path></svg>'; micButton.classList.toggle('bg-red-500', recording); micButton.classList.toggle('hover:bg-red-600', recording); micButton.classList.toggle('bg-indigo-600', !recording); micButton.classList.toggle('hover:bg-indigo-700', !recording); } // --- Main Chat Logic --- async function startRecording() { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); mediaRecorder = new MediaRecorder(stream); mediaRecorder.start(); audioChunks = []; mediaRecorder.ondataavailable = event => { audioChunks.push(event.data); }; mediaRecorder.onstop = async () => { toggleRecordingUI(false); setStatus("Processing..."); const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); processConversation(audioBlob); }; toggleRecordingUI(true); setStatus("Recording... Click to stop."); } catch (err) { console.error('Error accessing microphone:', err); setStatus("Error: Cannot access microphone."); alert('Please allow microphone access to use this feature.'); } } async function processConversation(audioBlob) { try { // Step 1: Speech-to-Text (STT) const formData = new FormData(); formData.append('audio_file', audioBlob, 'audio.wav'); const sttResponse = await fetch(STT_ENDPOINT, { method: 'POST', body: formData }); if (!sttResponse.ok) throw new Error('STT API failed'); const sttResult = await sttResponse.json(); const userText = sttResult.transcript; addMessage(userText, true); // Step 2: Text-to-Text (LLM) // This is a placeholder for your actual LLM chat endpoint const llmResponse = await fetch(LLM_ENDPOINT, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ prompt: userText }) }); if (!llmResponse.ok) throw new Error('LLM API failed'); const llmResult = await llmResponse.json(); const aiText = llmResult.text; addMessage(aiText, false); // Step 3: Text-to-Speech (TTS) const ttsResponse = await fetch(TTS_ENDPOINT, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text: aiText }) }); if (!ttsResponse.ok) throw new Error('TTS API failed'); const audioData = await ttsResponse.arrayBuffer(); // Step 4: Play the audio await playAudio(audioData); setStatus("Click the microphone to start recording."); } catch (error) { console.error("Conversation processing failed:", error); setStatus(`Error: ${error.message}`); alert(`An error occurred: ${error.message}`); } } async function playAudio(audioData) { const audioContext = new (window.AudioContext || window.webkitAudioContext)(); const audioBuffer = await audioContext.decodeAudioData(audioData); const source = audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContext.destination); source.start(); } // --- Event Listener --- micButton.addEventListener('click', () => { if (isRecording) { mediaRecorder.stop(); } else { startRecording(); } }); // Add a default welcome message document.addEventListener('DOMContentLoaded', () => { addMessage("Hello! I'm an AI assistant. How can I help you today?", false); }); </script> </body> </html>