<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Voice Chat</title>
<!-- Use Tailwind CSS for modern styling -->
<script src="https://cdn.tailwindcss.com"></script>
<style>
body {
font-family: 'Inter', sans-serif;
background-color: #f3f4f6;
}
.chat-container {
height: calc(100vh - 120px);
overflow-y: auto;
}
/* Custom scrollbar for a cleaner look */
.chat-container::-webkit-scrollbar {
width: 8px;
}
.chat-container::-webkit-scrollbar-track {
background: #e5e7eb;
}
.chat-container::-webkit-scrollbar-thumb {
background-color: #9ca3af;
border-radius: 4px;
}
.message-bubble {
max-width: 80%;
padding: 10px 15px;
border-radius: 20px;
margin-bottom: 15px;
word-wrap: break-word;
}
</style>
</head>
<body class="flex items-center justify-center min-h-screen">
<div class="bg-white p-8 rounded-2xl shadow-xl w-full max-w-lg mx-4 flex flex-col h-[90vh]">
<h1 class="text-3xl font-bold text-center mb-6 text-gray-800">AI Voice Chat</h1>
<!-- Chat History Window -->
<div id="chat-history" class="chat-container bg-gray-100 p-4 rounded-xl flex-grow mb-6 space-y-4">
<!-- Messages will be injected here -->
</div>
<!-- Controls Section -->
<div class="flex flex-col items-center space-y-4">
<div id="status" class="text-center text-sm font-medium text-gray-500 min-h-[20px]">
Click the microphone to start recording.
</div>
<button id="mic-button"
class="w-20 h-20 bg-indigo-600 rounded-full flex items-center justify-center
text-white text-4xl shadow-lg transition-all duration-300 ease-in-out
hover:bg-indigo-700 active:scale-95"
aria-label="Start recording">
<svg id="mic-icon" class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path>
</svg>
</button>
</div>
</div>
<script>
// --- Configuration ---
const STT_ENDPOINT = "/stt/transcribe";
const LLM_ENDPOINT = "/chat"; // Replace with your actual LLM chat endpoint
const TTS_ENDPOINT = "/speech";
// --- DOM Elements ---
const micButton = document.getElementById('mic-button');
const micIcon = document.getElementById('mic-icon');
const chatHistory = document.getElementById('chat-history');
const statusElement = document.getElementById('status');
// --- State Variables ---
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
// --- Helper Functions for UI ---
function addMessage(text, isUser = true) {
const messageDiv = document.createElement('div');
messageDiv.textContent = text;
messageDiv.className = `message-bubble ${isUser ? 'bg-indigo-500 text-white self-end' : 'bg-gray-300 text-gray-800 self-start'}`;
chatHistory.appendChild(messageDiv);
chatHistory.scrollTop = chatHistory.scrollHeight; // Auto-scroll to the latest message
}
function setStatus(message) {
statusElement.textContent = message;
}
function toggleRecordingUI(recording) {
isRecording = recording;
micIcon.innerHTML = recording
? '<svg class="w-8 h-8 animate-pulse" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M10 18a8 8 0 100-16 8 8 0 000 16zM8 9a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1zm0 4a1 1 0 011-1h2a1 1 0 110 2H9a1 1 0 01-1-1z"></path></svg>'
: '<svg class="w-8 h-8" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" d="M10 9a3 3 0 100-6 3 3 0 000 6zm-7 9a7 7 0 1114 0H3z" clip-rule="evenodd"></path></svg>';
micButton.classList.toggle('bg-red-500', recording);
micButton.classList.toggle('hover:bg-red-600', recording);
micButton.classList.toggle('bg-indigo-600', !recording);
micButton.classList.toggle('hover:bg-indigo-700', !recording);
}
// --- Main Chat Logic ---
async function startRecording() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();
audioChunks = [];
mediaRecorder.ondataavailable = event => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = async () => {
toggleRecordingUI(false);
setStatus("Processing...");
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
processConversation(audioBlob);
};
toggleRecordingUI(true);
setStatus("Recording... Click to stop.");
} catch (err) {
console.error('Error accessing microphone:', err);
setStatus("Error: Cannot access microphone.");
alert('Please allow microphone access to use this feature.');
}
}
async function processConversation(audioBlob) {
try {
// Step 1: Speech-to-Text (STT)
const formData = new FormData();
formData.append('audio_file', audioBlob, 'audio.wav');
const sttResponse = await fetch(STT_ENDPOINT, {
method: 'POST',
body: formData
});
if (!sttResponse.ok) throw new Error('STT API failed');
const sttResult = await sttResponse.json();
const userText = sttResult.transcript;
addMessage(userText, true);
// Step 2: Text-to-Text (LLM)
// This is a placeholder for your actual LLM chat endpoint
const llmResponse = await fetch(LLM_ENDPOINT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt: userText })
});
if (!llmResponse.ok) throw new Error('LLM API failed');
const llmResult = await llmResponse.json();
const aiText = llmResult.text;
addMessage(aiText, false);
// Step 3: Text-to-Speech (TTS)
const ttsResponse = await fetch(TTS_ENDPOINT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: aiText })
});
if (!ttsResponse.ok) throw new Error('TTS API failed');
const audioData = await ttsResponse.arrayBuffer();
// Step 4: Play the audio
await playAudio(audioData);
setStatus("Click the microphone to start recording.");
} catch (error) {
console.error("Conversation processing failed:", error);
setStatus(`Error: ${error.message}`);
alert(`An error occurred: ${error.message}`);
}
}
async function playAudio(audioData) {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const audioBuffer = await audioContext.decodeAudioData(audioData);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start();
}
// --- Event Listener ---
micButton.addEventListener('click', () => {
if (isRecording) {
mediaRecorder.stop();
} else {
startRecording();
}
});
// Add a default welcome message
document.addEventListener('DOMContentLoaded', () => {
addMessage("Hello! I'm an AI assistant. How can I help you today?", false);
});
</script>
</body>
</html>