cortex-hub/ui/tts-client-app/src/services/apiService.js at 200a04500087f1ac45c703a1e6cf394bfcf405d8

Fork: 0
yangyangxie / cortex-hub
Find file
Newer
Older
cortex-hub / ui / tts-client-app / src / services / apiService.js
yangyangxie 6 days ago 3 KB reactor ui project
Raw Blame History
// src/services/apiService.js

// This file handles all communication with your API endpoints.
// It is designed to be stateless and does not use any React hooks.

import { convertPcmToFloat32 } from "./audioUtils";

// Please replace with your actual endpoints
const STT_ENDPOINT = "http://localhost:8001/stt/transcribe";
const SESSIONS_CREATE_ENDPOINT = "http://localhost:8001/sessions";
const SESSIONS_CHAT_ENDPOINT = (id) => `http://localhost:8001/sessions/${id}/chat`;
const TTS_ENDPOINT = "http://localhost:8001/speech";

/**
 * Creates a new chat session with a unique user ID.
 * @returns {Promise<Object>} The session object from the API response.
 */
export const createSession = async () => {
  const generatedUserId = crypto.randomUUID();
  const response = await fetch(SESSIONS_CREATE_ENDPOINT, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ user_id: generatedUserId }),
  });
  if (!response.ok) {
    throw new Error(`Failed to create session. Status: ${response.status}`);
  }
  return await response.json();
};

/**
 * Sends an audio blob to the STT endpoint for transcription.
 * @param {Blob} audioBlob - The recorded audio data.
 * @returns {Promise<string>} The transcribed text.
 */
export const transcribeAudio = async (audioBlob) => {
  const formData = new FormData();
  formData.append("audio_file", audioBlob, "audio.wav");
  const response = await fetch(STT_ENDPOINT, {
    method: "POST",
    body: formData,
  });
  if (!response.ok) {
    throw new Error("STT API failed");
  }
  const result = await response.json();
  return result.transcript;
};

/**
 * Sends a text prompt to the LLM endpoint and gets a text response.
 * @param {string} sessionId - The current chat session ID.
 * @param {string} prompt - The user's text prompt.
 * @returns {Promise<string>} The AI's text response.
 */
export const chatWithAI = async (sessionId, prompt) => {
  const response = await fetch(SESSIONS_CHAT_ENDPOINT(sessionId), {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ prompt: prompt, model: "gemini" }),
  });
  if (!response.ok) {
    throw new Error("LLM API failed");
  }
  const result = await response.json();
  return result.answer;
};

/**
 * Streams speech from the TTS endpoint and processes each chunk.
 * It uses a callback to pass the processed audio data back to the caller.
 * @param {string} text - The text to be synthesized.
 * @param {function(Float32Array): void} onData - Callback for each audio chunk.
 * @param {function(): void} onDone - Callback to execute when the stream is finished.
 * @returns {Promise<void>}
 */
export const streamSpeech = async (text, onData, onDone) => {
  try {
    const url = `${TTS_ENDPOINT}?stream=true&as_wav=false`;

    const response = await fetch(url, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ text }),
    });

    if (!response.ok) {
      throw new Error(`HTTP error! Status: ${response.status}`);
    }

    const reader = response.body.getReader();
    let leftover = new Uint8Array(0);

    while (true) {
      const { done, value: chunk } = await reader.read();
      if (done) {
        if (leftover.length > 0) {
          console.warn("Leftover bytes discarded:", leftover.length);
        }
        break;
      }

      let combined = new Uint8Array(leftover.length + chunk.length);
      combined.set(leftover);
      combined.set(chunk, leftover.length);

      let length = combined.length;
      if (length % 2 !== 0) {
        length -= 1;
      }

      const toConvert = combined.slice(0, length);
      leftover = combined.slice(length);
      const float32Raw = convertPcmToFloat32(toConvert);
      
      // Pass the raw float32 data to the caller for resampling
      onData(float32Raw);
    }
  } catch (error) {
    console.error("Failed to stream speech:", error);
    throw error;
  } finally {
    // We call the onDone callback to let the hook know the stream has ended.
    onDone();
  }
};