// src/services/apiService.js // This file handles all communication with your API endpoints. // It is designed to be stateless and does not use any React hooks. import { convertPcmToFloat32 } from "./audioUtils"; // Please replace with your actual endpoints const STT_ENDPOINT = "http://localhost:8001/stt/transcribe"; const SESSIONS_CREATE_ENDPOINT = "http://localhost:8001/sessions"; const SESSIONS_CHAT_ENDPOINT = (id) => `http://localhost:8001/sessions/${id}/chat`; const TTS_ENDPOINT = "http://localhost:8001/speech"; /** * Creates a new chat session with a unique user ID. * @returns {Promise<Object>} The session object from the API response. */ export const createSession = async () => { const generatedUserId = crypto.randomUUID(); const response = await fetch(SESSIONS_CREATE_ENDPOINT, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ user_id: generatedUserId }), }); if (!response.ok) { throw new Error(`Failed to create session. Status: ${response.status}`); } return await response.json(); }; /** * Sends an audio blob to the STT endpoint for transcription. * @param {Blob} audioBlob - The recorded audio data. * @returns {Promise<string>} The transcribed text. */ export const transcribeAudio = async (audioBlob) => { const formData = new FormData(); formData.append("audio_file", audioBlob, "audio.wav"); const response = await fetch(STT_ENDPOINT, { method: "POST", body: formData, }); if (!response.ok) { throw new Error("STT API failed"); } const result = await response.json(); return result.transcript; }; /** * Sends a text prompt to the LLM endpoint and gets a text response. * @param {string} sessionId - The current chat session ID. * @param {string} prompt - The user's text prompt. * @returns {Promise<string>} The AI's text response. */ export const chatWithAI = async (sessionId, prompt) => { const response = await fetch(SESSIONS_CHAT_ENDPOINT(sessionId), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ prompt: prompt, model: "gemini" }), }); if (!response.ok) { throw new Error("LLM API failed"); } const result = await response.json(); return result.answer; }; /** * Streams speech from the TTS endpoint and processes each chunk. * It uses a callback to pass the processed audio data back to the caller. * @param {string} text - The text to be synthesized. * @param {function(Float32Array): void} onData - Callback for each audio chunk. * @param {function(): void} onDone - Callback to execute when the stream is finished. * @returns {Promise<void>} */ export const streamSpeech = async (text, onData, onDone) => { try { const url = `${TTS_ENDPOINT}?stream=true&as_wav=false`; const response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text }), }); if (!response.ok) { throw new Error(`HTTP error! Status: ${response.status}`); } const reader = response.body.getReader(); let leftover = new Uint8Array(0); while (true) { const { done, value: chunk } = await reader.read(); if (done) { if (leftover.length > 0) { console.warn("Leftover bytes discarded:", leftover.length); } break; } let combined = new Uint8Array(leftover.length + chunk.length); combined.set(leftover); combined.set(chunk, leftover.length); let length = combined.length; if (length % 2 !== 0) { length -= 1; } const toConvert = combined.slice(0, length); leftover = combined.slice(length); const float32Raw = convertPcmToFloat32(toConvert); // Pass the raw float32 data to the caller for resampling onData(float32Raw); } } catch (error) { console.error("Failed to stream speech:", error); throw error; } finally { // We call the onDone callback to let the hook know the stream has ended. onDone(); } };