// src/services/apiService.js
// This file handles all communication with your API endpoints.
// It is designed to be stateless and does not use any React hooks.
import { convertPcmToFloat32 } from "./audioUtils";
// Please replace with your actual endpoints
const STT_ENDPOINT = "http://localhost:8001/stt/transcribe";
const SESSIONS_CREATE_ENDPOINT = "http://localhost:8001/sessions";
const SESSIONS_CHAT_ENDPOINT = (id) => `http://localhost:8001/sessions/${id}/chat`;
const TTS_ENDPOINT = "http://localhost:8001/speech";
/**
* Creates a new chat session with a unique user ID.
* @returns {Promise<Object>} The session object from the API response.
*/
export const createSession = async () => {
const generatedUserId = crypto.randomUUID();
const response = await fetch(SESSIONS_CREATE_ENDPOINT, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ user_id: generatedUserId }),
});
if (!response.ok) {
throw new Error(`Failed to create session. Status: ${response.status}`);
}
return await response.json();
};
/**
* Sends an audio blob to the STT endpoint for transcription.
* @param {Blob} audioBlob - The recorded audio data.
* @returns {Promise<string>} The transcribed text.
*/
export const transcribeAudio = async (audioBlob) => {
const formData = new FormData();
formData.append("audio_file", audioBlob, "audio.wav");
const response = await fetch(STT_ENDPOINT, {
method: "POST",
body: formData,
});
if (!response.ok) {
throw new Error("STT API failed");
}
const result = await response.json();
return result.transcript;
};
/**
* Sends a text prompt to the LLM endpoint and gets a text response.
* @param {string} sessionId - The current chat session ID.
* @param {string} prompt - The user's text prompt.
* @returns {Promise<string>} The AI's text response.
*/
export const chatWithAI = async (sessionId, prompt) => {
const response = await fetch(SESSIONS_CHAT_ENDPOINT(sessionId), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ prompt: prompt, provider_name: "gemini" }),
});
if (!response.ok) {
throw new Error("LLM API failed");
}
const result = await response.json();
return result.answer;
};
/**
* Streams speech from the TTS endpoint and processes each chunk.
* It uses a callback to pass the processed audio data back to the caller.
* @param {string} text - The text to be synthesized.
* @param {function(Float32Array): void} onData - Callback for each audio chunk.
* @param {function(): void} onDone - Callback to execute when the stream is finished.
* @returns {Promise<void>}
*/
export const streamSpeech = async (text, onData, onDone) => {
try {
const url = `${TTS_ENDPOINT}?stream=true&as_wav=false`;
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text }),
});
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
const reader = response.body.getReader();
let leftover = new Uint8Array(0);
while (true) {
const { done, value: chunk } = await reader.read();
if (done) {
if (leftover.length > 0) {
console.warn("Leftover bytes discarded:", leftover.length);
}
break;
}
let combined = new Uint8Array(leftover.length + chunk.length);
combined.set(leftover);
combined.set(chunk, leftover.length);
let length = combined.length;
if (length % 2 !== 0) {
length -= 1;
}
const toConvert = combined.slice(0, length);
leftover = combined.slice(length);
const float32Raw = convertPcmToFloat32(toConvert);
// Pass the raw float32 data to the caller for resampling
onData(float32Raw);
}
} catch (error) {
console.error("Failed to stream speech:", error);
throw error;
} finally {
// We call the onDone callback to let the hook know the stream has ended.
onDone();
}
};