Newer
Older
cortex-hub / ai-hub / integration_tests / demo / voice_chat.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>AI Voice Chat</title>
    <!-- Use Tailwind CSS for modern styling -->
    <script src="https://cdn.tailwindcss.com"></script>
    <style>
        body {
            font-family: 'Inter', sans-serif;
            background-color: #f3f4f6;
        }
        .chat-container {
            height: calc(100vh - 120px);
            overflow-y: auto;
        }
        /* Custom scrollbar for a cleaner look */
        .chat-container::-webkit-scrollbar {
            width: 8px;
        }
        .chat-container::-webkit-scrollbar-track {
            background: #e5e7eb;
        }
        .chat-container::-webkit-scrollbar-thumb {
            background-color: #9ca3af;
            border-radius: 4px;
        }
        .message-bubble {
            max-width: 80%;
            padding: 10px 15px;
            border-radius: 20px;
            margin-bottom: 15px;
            word-wrap: break-word;
        }
    </style>
</head>
<body class="flex items-center justify-center min-h-screen">
    <div class="bg-white p-8 rounded-2xl shadow-xl w-full max-w-lg mx-4 flex flex-col h-[90vh]">
        <h1 class="text-3xl font-bold text-center mb-6 text-gray-800">AI Voice Chat</h1>
        
        <!-- Chat History Window -->
        <div id="chat-history" class="chat-container bg-gray-100 p-4 rounded-xl flex-grow mb-6 space-y-4">
            <!-- Messages will be injected here -->
        </div>

        <!-- Controls Section -->
        <div class="flex flex-col items-center space-y-4">
            <div id="status" class="text-center text-sm font-medium text-gray-500 min-h-[20px]">
                Click the microphone to start recording.
            </div>
            
            <button id="mic-button" 
                    class="w-20 h-20 bg-indigo-600 rounded-full flex items-center justify-center 
                           text-white text-4xl shadow-lg transition-all duration-300 ease-in-out 
                           hover:bg-indigo-700 active:scale-95"
                    aria-label="Start recording">
                <svg id="mic-icon" class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
                    <path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path>
                </svg>
            </button>
        </div>
    </div>

    <script>
        // --- Configuration ---
        const STT_ENDPOINT = "/stt/transcribe";
        const LLM_ENDPOINT = "/chat"; // Replace with your actual LLM chat endpoint
        const TTS_ENDPOINT = "/speech";
        
        // --- DOM Elements ---
        const micButton = document.getElementById('mic-button');
        const micIcon = document.getElementById('mic-icon');
        const chatHistory = document.getElementById('chat-history');
        const statusElement = document.getElementById('status');

        // --- State Variables ---
        let mediaRecorder;
        let audioChunks = [];
        let isRecording = false;

        // --- Helper Functions for UI ---
        function addMessage(text, isUser = true) {
            const messageDiv = document.createElement('div');
            messageDiv.textContent = text;
            messageDiv.className = `message-bubble ${isUser ? 'bg-indigo-500 text-white self-end' : 'bg-gray-300 text-gray-800 self-start'}`;
            chatHistory.appendChild(messageDiv);
            chatHistory.scrollTop = chatHistory.scrollHeight; // Auto-scroll to the latest message
        }

        function setStatus(message) {
            statusElement.textContent = message;
        }

        function toggleRecordingUI(recording) {
            isRecording = recording;
            micIcon.innerHTML = recording 
                ? '<svg class="w-8 h-8 animate-pulse" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M10 18a8 8 0 100-16 8 8 0 000 16zM8 9a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1zm0 4a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1z"></path></svg>'
                : '<svg class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path></svg>';
            micButton.classList.toggle('bg-red-500', recording);
            micButton.classList.toggle('hover:bg-red-600', recording);
            micButton.classList.toggle('bg-indigo-600', !recording);
            micButton.classList.toggle('hover:bg-indigo-700', !recording);
        }

        // --- Main Chat Logic ---
        async function startRecording() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);
                mediaRecorder.start();
                audioChunks = [];
                
                mediaRecorder.ondataavailable = event => {
                    audioChunks.push(event.data);
                };

                mediaRecorder.onstop = async () => {
                    toggleRecordingUI(false);
                    setStatus("Processing...");
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    processConversation(audioBlob);
                };
                
                toggleRecordingUI(true);
                setStatus("Recording... Click to stop.");
            } catch (err) {
                console.error('Error accessing microphone:', err);
                setStatus("Error: Cannot access microphone.");
                alert('Please allow microphone access to use this feature.');
            }
        }

        async function processConversation(audioBlob) {
            try {
                // Step 1: Speech-to-Text (STT)
                const formData = new FormData();
                formData.append('audio_file', audioBlob, 'audio.wav');

                const sttResponse = await fetch(STT_ENDPOINT, {
                    method: 'POST',
                    body: formData
                });
                if (!sttResponse.ok) throw new Error('STT API failed');
                const sttResult = await sttResponse.json();
                const userText = sttResult.transcript;
                addMessage(userText, true);

                // Step 2: Text-to-Text (LLM)
                // This is a placeholder for your actual LLM chat endpoint
                const llmResponse = await fetch(LLM_ENDPOINT, {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    body: JSON.stringify({ prompt: userText })
                });
                if (!llmResponse.ok) throw new Error('LLM API failed');
                const llmResult = await llmResponse.json();
                const aiText = llmResult.text;
                addMessage(aiText, false);

                // Step 3: Text-to-Speech (TTS)
                const ttsResponse = await fetch(TTS_ENDPOINT, {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    body: JSON.stringify({ text: aiText })
                });
                if (!ttsResponse.ok) throw new Error('TTS API failed');
                const audioData = await ttsResponse.arrayBuffer();

                // Step 4: Play the audio
                await playAudio(audioData);
                setStatus("Click the microphone to start recording.");
                
            } catch (error) {
                console.error("Conversation processing failed:", error);
                setStatus(`Error: ${error.message}`);
                alert(`An error occurred: ${error.message}`);
            }
        }

        async function playAudio(audioData) {
            const audioContext = new (window.AudioContext || window.webkitAudioContext)();
            const audioBuffer = await audioContext.decodeAudioData(audioData);
            const source = audioContext.createBufferSource();
            source.buffer = audioBuffer;
            source.connect(audioContext.destination);
            source.start();
        }

        // --- Event Listener ---
        micButton.addEventListener('click', () => {
            if (isRecording) {
                mediaRecorder.stop();
            } else {
                startRecording();
            }
        });

        // Add a default welcome message
        document.addEventListener('DOMContentLoaded', () => {
            addMessage("Hello! I'm an AI assistant. How can I help you today?", false);
        });
    </script>
</body>
</html>