diff --git a/ai-hub/app/api/routes/README.md b/ai-hub/app/api/routes/README.md
index 5fb40c6..b330071 100644
--- a/ai-hub/app/api/routes/README.md
+++ b/ai-hub/app/api/routes/README.md
@@ -1,683 +1,243 @@
-# Invoking the Text-to-Speech (TTS) API Endpoint
+# API Documentation
 
-This guide explains how a frontend application can interact with the FastAPI `/speech` endpoint for text-to-speech conversion. The endpoint supports both **non-streaming** and **streaming** audio responses.
+This repository provides frontend integration guides for the FastAPI backend's main endpoints:
+
+1. **Text-to-Speech (TTS) API**
+2. **Speech-to-Text (STT) API**
+3. **Chat Sessions API**
+4. **Documents API**
 
 ---
 
-## 1. Endpoint Details
+## 1. Text-to-Speech (TTS) API
 
-* **HTTP Method:** `POST`
-* **Path:** `/speech`
-* **Purpose:** Convert a given text string into audio.
+This API converts text into audio, supporting both **non-streaming** and **streaming** modes.
+
+### 1.1 Endpoint Details
+
+| Method | Path      | Purpose                            |
+| ------ | --------- | ---------------------------------- |
+| POST   | `/speech` | Convert a given text string to audio |
 
 ---
 
-## 2. Request Structure
+### 1.2 Request Structure
 
-### 2.1 Request Body
+#### Request Body (JSON)
 
-The POST request must include a JSON object matching the `SpeechRequest` schema.
+| Field  | Type   | Description                  | Example                                        |
+| ------ | ------ | ---------------------------- | ---------------------------------------------- |
+| text   | string | Text to convert to speech     | `"Hello, this is a test message."`             |
 
-| Field | Type   | Description                    | Example                            |
-| ----- | ------ | ------------------------------ | ---------------------------------- |
-| text  | string | Text to be converted to speech | `"Hello, this is a test message."` |
-
-**Example JSON body:**
-
+**Example:**
 ```json
 {
   "text": "The quick brown fox jumps over the lazy dog."
 }
-```
+````
 
----
+#### Query Parameters
 
-### 2.2 Query Parameter
-
-| Parameter | Type    | Default | Description                                                                            |
-| --------- | ------- | ------- | -------------------------------------------------------------------------------------- |
-| stream    | boolean | false   | If `true`, returns a continuous audio stream. If `false`, returns the full audio file. |
+| Parameter | Type    | Default | Description                                                                 |
+| --------- | ------- | ------- | --------------------------------------------------------------------------- |
+| stream    | boolean | false   | If true, returns continuous audio stream.                                   |
+| as\_wav   | boolean | true    | **Streaming only**: If true, returns WAV chunks; if false, returns raw PCM. |
 
 **Example URLs:**
 
-* Non-streaming (Default):
-
-  ```
-  http://[your-api-server]/speech
-  ```
-
-* Streaming:
-
-  ```
-  http://[your-api-server]/speech?stream=true
-  ```
+```
+Non-streaming: http://[your-api-server]/speech
+Streaming WAV: http://[your-api-server]/speech?stream=true
+Streaming PCM: http://[your-api-server]/speech?stream=true&as_wav=false
+```
 
 ---
 
-## 3. Frontend Implementation (JavaScript)
+### 1.3 Frontend Examples (JavaScript)
 
-Below are two implementations using the `fetch` API.
-
----
-
-### Example 1: Non-Streaming Response
-
-Downloads the complete WAV file before playing. Suitable for short messages.
+#### Example 1: Non-Streaming
 
 ```javascript
-// Generate and play non-streaming audio
 async function getSpeechAudio(text) {
-  const url = 'http://[your-api-server]/speech'; // Replace with your API URL
-  
-  try {
-    const response = await fetch(url, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ text })
-    });
-
-    if (!response.ok) {
-      throw new Error(`HTTP error! status: ${response.status}`);
-    }
-
-    const audioBlob = await response.blob();
-    const audioUrl = URL.createObjectURL(audioBlob);
-
-    const audio = new Audio(audioUrl);
-    audio.play();
-
-    console.log("Audio file received and is now playing.");
-  } catch (error) {
-    console.error("Failed to generate speech:", error);
-  }
+  const response = await fetch('http://[your-api-server]/speech', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ text })
+  });
+  const audioBlob = await response.blob();
+  const audioUrl = URL.createObjectURL(audioBlob);
+  new Audio(audioUrl).play();
 }
-
-// Example:
-// getSpeechAudio("This is an example of a non-streaming response.");
 ```
 
----
-
-### Example 2: Streaming Response
-
-Plays audio as it arrives using the **MediaSource API**. Ideal for long texts.
+#### Example 2: Streaming WAV
 
 ```javascript
-// Stream audio and play as it arrives
 async function streamSpeechAudio(text) {
-  const url = 'http://[your-api-server]/speech?stream=true'; // Replace with your API URL
-  
-  try {
-    const response = await fetch(url, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ text })
-    });
-
-    if (!response.ok || !response.body) {
-      throw new Error(`HTTP error! status: ${response.status}`);
+  const response = await fetch('http://[your-api-server]/speech?stream=true', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ text })
+  });
+  const mediaSource = new MediaSource();
+  const audio = new Audio(URL.createObjectURL(mediaSource));
+  mediaSource.addEventListener('sourceopen', async () => {
+    const sourceBuffer = mediaSource.addSourceBuffer('audio/wav');
+    const reader = response.body.getReader();
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) { mediaSource.endOfStream(); break; }
+      sourceBuffer.appendBuffer(value);
     }
+  });
+  audio.play();
+}
+```
 
-    const mediaSource = new MediaSource();
-    const audio = new Audio();
-    audio.src = URL.createObjectURL(mediaSource);
+#### Example 3: Streaming PCM (Web Audio API)
 
-    mediaSource.addEventListener('sourceopen', async () => {
-      const sourceBuffer = mediaSource.addSourceBuffer('audio/wav');
-      const reader = response.body.getReader();
-      
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) {
-          mediaSource.endOfStream();
-          break;
-        }
-        sourceBuffer.appendBuffer(value);
-      }
-    });
+```javascript
+async function streamPcmAudio(text) {
+  const response = await fetch('http://[your-api-server]/speech?stream=true&as_wav=false', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ text })
+  });
+  const audioContext = new AudioContext({ sampleRate: 24000 });
+  const reader = response.body.getReader();
+  let currentOffset = 0;
 
-    audio.play();
-    console.log("Streaming audio is starting...");
-  } catch (error) {
-    console.error("Failed to stream speech:", error);
+  function pcmToFloat32(pcm) {
+    const int16 = new Int16Array(pcm.buffer);
+    const float32 = new Float32Array(int16.length);
+    for (let i = 0; i < int16.length; i++) float32[i] = int16[i] / 32768.0;
+    return float32;
+  }
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    const data = pcmToFloat32(value);
+    const buffer = audioContext.createBuffer(1, data.length, audioContext.sampleRate);
+    buffer.copyToChannel(data, 0);
+    const source = audioContext.createBufferSource();
+    source.buffer = buffer;
+    source.connect(audioContext.destination);
+    source.start(currentOffset);
+    currentOffset += buffer.duration;
   }
 }
-
-// Example:
-// streamSpeechAudio("This is an example of a streaming response, which begins playing before the entire audio file is received.");
 ```
 
-# Invoking the Speech-to-Text (STT) API Endpoint
+---
 
-This document explains how a frontend application can interact with the FastAPI `/stt/transcribe` endpoint to transcribe an uploaded audio file into text.
+## 2. Speech-to-Text (STT) API
+
+Converts uploaded audio into text.
+
+### 2.1 Endpoint Details
+
+| Method | Path              | Purpose          | Content-Type        |
+| ------ | ----------------- | ---------------- | ------------------- |
+| POST   | `/stt/transcribe` | Transcribe audio | multipart/form-data |
 
 ---
 
-## 1. Endpoint Details
+### 2.2 Request Structure
 
-* **HTTP Method:** `POST`
-* **Path:** `/stt/transcribe`
-* **Purpose:** Transcribe an uploaded audio file into text.
-* **Content Type:** `multipart/form-data`
+| Field       | Type | Description              |
+| ----------- | ---- | ------------------------ |
+| audio\_file | File | Audio file to transcribe |
 
 ---
 
-## 2. Request Structure
-
-### 2.1 Request Body
-
-The POST request must include a `multipart/form-data` object with a single file field named `audio_file`.
-
-| Field       | Type | Description                      |
-| ----------- | ---- | -------------------------------- |
-| audio\_file | File | The audio file to be transcribed |
-
----
-
-## 3. Frontend Implementation (JavaScript + HTML)
-
-Below is a complete working example using `fetch` to send the file and display the transcription result.
+### 2.3 Example Frontend (HTML + JS)
 
 ```html
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>STT API Example</title>
-  <style>
-    body { font-family: sans-serif; padding: 2rem; background-color: #f4f4f9; }
-    .container { max-width: 600px; margin: auto; padding: 2rem; background: white; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
-    h1 { text-align: center; color: #333; }
-    label { display: block; margin-bottom: 0.5rem; font-weight: bold; }
-    input[type="file"] { width: 100%; padding: 0.5rem; margin-bottom: 1rem; border: 1px solid #ccc; border-radius: 4px; }
-    button { width: 100%; padding: 0.75rem; font-size: 1rem; color: white; background-color: #007bff; border: none; border-radius: 4px; cursor: pointer; transition: background-color 0.3s; }
-    button:hover { background-color: #0056b3; }
-    #result { margin-top: 1.5rem; padding: 1rem; background-color: #e9ecef; border-radius: 4px; white-space: pre-wrap; }
-    #result p { margin: 0; font-style: italic; color: #555; }
-    #loading { display: none; text-align: center; margin-top: 1rem; font-weight: bold; color: #6c757d; }
-  </style>
-</head>
-<body>
-  <div class="container">
-    <h1>Speech-to-Text (STT) Transcription</h1>
-    <label for="audioFile">Select an audio file:</label>
-    <input type="file" id="audioFile" accept="audio/*">
-    <button id="transcribeButton">Transcribe</button>
-    <div id="loading">Transcribing...</div>
-    <div id="result">
-      <p>Your transcribed text will appear here.</p>
-    </div>
-  </div>
+<input type="file" id="audioFile" accept="audio/*">
+<button id="transcribeBtn">Transcribe</button>
+<div id="result"></div>
 
-  <script>
-    document.getElementById('transcribeButton').addEventListener('click', async () => {
-      const fileInput = document.getElementById('audioFile');
-      const resultDiv = document.getElementById('result');
-      const loadingDiv = document.getElementById('loading');
+<script>
+document.getElementById('transcribeBtn').addEventListener('click', async () => {
+  const file = document.getElementById('audioFile').files[0];
+  const formData = new FormData();
+  formData.append('audio_file', file);
 
-      if (fileInput.files.length === 0) {
-        resultDiv.innerHTML = '<p style="color:red;">Please select an audio file first.</p>';
-        return;
-      }
-
-      const audioFile = fileInput.files[0];
-      loadingDiv.style.display = 'block';
-      resultDiv.innerHTML = '';
-
-      const url = 'http://[your-api-server]/stt/transcribe'; // Replace with your API URL
-
-      const formData = new FormData();
-      formData.append('audio_file', audioFile);
-
-      try {
-        const response = await fetch(url, {
-          method: 'POST',
-          body: formData
-        });
-
-        if (!response.ok) {
-          const errorData = await response.json();
-          throw new Error(`HTTP error! Status: ${response.status}. Detail: ${errorData.detail}`);
-        }
-
-        const result = await response.json();
-        resultDiv.innerHTML = `<h3>Transcription Result:</h3><p>${result.transcript}</p>`;
-        console.log("Transcription successful:", result.transcript);
-      } catch (error) {
-        resultDiv.innerHTML = `<h3 style="color:red;">Error:</h3><p>${error.message}</p>`;
-        console.error("Transcription failed:", error);
-      } finally {
-        loadingDiv.style.display = 'none';
-      }
-    });
-  </script>
-</body>
-</html>
+  const res = await fetch('http://[your-api-server]/stt/transcribe', { method: 'POST', body: formData });
+  const data = await res.json();
+  document.getElementById('result').textContent = data.transcript;
+});
+</script>
 ```
 
-Here’s your Chat Sessions API documentation reformatted for clarity, structure, and consistency:
+---
+
+## 3. Chat Sessions API
+
+Manages conversational sessions with the AI.
+
+### 3.1 Endpoints
+
+| Method | Path                              | Purpose                     |
+| ------ | --------------------------------- | --------------------------- |
+| POST   | `/sessions/`                      | Create a new chat session   |
+| POST   | `/sessions/{session_id}/chat`     | Send a message in a session |
+| GET    | `/sessions/{session_id}/messages` | Retrieve chat history       |
 
 ---
 
-# Invoking the Chat Sessions API Endpoint
+## 4. Documents API
 
-This document describes how a frontend application can interact with the FastAPI `/sessions` endpoints. These endpoints allow you to:
+Add, list, and delete documents.
 
-* Create new chat sessions
-* Send messages within a session
-* Retrieve chat history
+### 4.1 Endpoints
+
+| Method | Path                       | Purpose            |
+| ------ | -------------------------- | ------------------ |
+| POST   | `/documents/`              | Add a document     |
+| GET    | `/documents/`              | List all documents |
+| DELETE | `/documents/{document_id}` | Delete a document  |
 
 ---
 
-## 1. Endpoint Details
-
-| HTTP Method | Path                              | Purpose                                                       | Request Type       |
-| ----------- | --------------------------------- | ------------------------------------------------------------- | ------------------ |
-| **POST**    | `/sessions/`                      | Creates a new chat session                                    | `application/json` |
-| **POST**    | `/sessions/{session_id}/chat`     | Sends a message and receives a response in a specific session | `application/json` |
-| **GET**     | `/sessions/{session_id}/messages` | Retrieves the message history for a given session             | N/A                |
-
----
-
-## 2. Request & Response Structures
-
-### 2.1 Create a New Chat Session
-
-**POST** `/sessions/`
-
-**Request Body:**
-
-| Field    | Type   | Description                         |
-| -------- | ------ | ----------------------------------- |
-| user\_id | string | ID of the user creating the session |
-| model    | string | Model to use for the session        |
-
-**Example Request:**
-
-```json
-{
-  "user_id": "user-1234",
-  "model": "gemini"
-}
-```
-
-**Response Body:**
-
-| Field       | Type    | Description                |
-| ----------- | ------- | -------------------------- |
-| id          | integer | Session ID                 |
-| user\_id    | string  | User ID                    |
-| created\_at | string  | Session creation timestamp |
-| model       | string  | Model used                 |
-
----
-
-### 2.2 Send a Message in a Session
-
-**POST** `/sessions/{session_id}/chat`
-
-**Path Parameter:**
-
-| Name        | Type    | Description       |
-| ----------- | ------- | ----------------- |
-| session\_id | integer | Unique session ID |
-
-**Request Body:**
-
-| Field                  | Type    | Description                                           |
-| ---------------------- | ------- | ----------------------------------------------------- |
-| prompt                 | string  | User message                                          |
-| model                  | string  | Model for this message (can override session default) |
-| load\_faiss\_retriever | boolean | Whether to use FAISS retriever                        |
-
-**Example Request:**
-
-```json
-{
-  "prompt": "What is the capital of France?",
-  "model": "gemini",
-  "load_faiss_retriever": false
-}
-```
-
-**Response Body:**
-
-| Field       | Type   | Description                 |
-| ----------- | ------ | --------------------------- |
-| answer      | string | Model's answer              |
-| model\_used | string | Model used for the response |
-
----
-
-### 2.3 Get Session Chat History
-
-**GET** `/sessions/{session_id}/messages`
-
-**Path Parameter:**
-
-| Name        | Type    | Description       |
-| ----------- | ------- | ----------------- |
-| session\_id | integer | Unique session ID |
-
-**Response Body:**
-
-| Field       | Type    | Description                                              |
-| ----------- | ------- | -------------------------------------------------------- |
-| session\_id | integer | Session ID                                               |
-| messages    | array   | List of message objects (`role`, `content`, `timestamp`) |
-
----
-
-## 3. Frontend Implementation (HTML + JavaScript)
-
-Below is a complete example that:
-
-1. Creates a new chat session
-2. Sends a message in the session
-3. Retrieves the chat history
+### 4.2 Example Frontend (HTML + JS)
 
 ```html
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Chat Sessions API Example</title>
-  <style>
-    body { font-family: sans-serif; padding: 2rem; background-color: #f4f4f9; }
-    .container { max-width: 800px; margin: auto; padding: 2rem; background: white; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
-    h1 { text-align: center; color: #333; }
-    h2 { border-bottom: 2px solid #ccc; padding-bottom: 0.5rem; margin-top: 2rem; }
-    button { padding: 0.75rem 1.5rem; font-size: 1rem; color: white; background-color: #007bff; border: none; border-radius: 4px; cursor: pointer; transition: background-color 0.3s; margin-top: 1rem; }
-    button:hover { background-color: #0056b3; }
-    pre { background-color: #e9ecef; padding: 1rem; border-radius: 4px; white-space: pre-wrap; font-family: monospace; }
-    .result { margin-top: 1.5rem; }
-  </style>
-</head>
-<body>
-  <div class="container">
-    <h1>Chat Sessions API Example</h1>
-    <p>This page demonstrates creating a session, sending a message, and retrieving the history.</p>
-    <button id="runWorkflowButton">Run Full Workflow</button>
-    <div class="result">
-      <h2>Workflow Log</h2>
-      <pre id="output"></pre>
-    </div>
-  </div>
+<form id="addDoc">
+  <input type="text" id="title" placeholder="Title" required>
+  <textarea id="content" placeholder="Content" required></textarea>
+  <button type="submit">Add Document</button>
+</form>
+<div id="docs"></div>
 
-  <script>
-    const API_URL = 'http://[your-api-server]'; // Replace with your API URL
-    const outputDiv = document.getElementById('output');
+<script>
+const API = 'http://[your-api-server]';
 
-    function log(message) {
-      outputDiv.textContent += message + '\n\n';
-      outputDiv.scrollTop = outputDiv.scrollHeight;
-    }
-
-    document.getElementById('runWorkflowButton').addEventListener('click', async () => {
-      outputDiv.textContent = '';
-      let sessionId = null;
-
-      log("Starting workflow...");
-
-      // Step 1: Create session
-      log("1. Creating session...");
-      try {
-        const res = await fetch(`${API_URL}/sessions/`, {
-          method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({ user_id: "demo-user-123", model: "gemini-1.5-flash" })
-        });
-        if (!res.ok) throw new Error(`HTTP error ${res.status}`);
-        const session = await res.json();
-        sessionId = session.id;
-        log(`Session created: ${sessionId}`);
-      } catch (err) {
-        log(`Error creating session: ${err.message}`);
-        return;
-      }
-
-      // Step 2: Send message
-      log("2. Sending message...");
-      try {
-        const res = await fetch(`${API_URL}/sessions/${sessionId}/chat`, {
-          method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({ 
-            prompt: "What is the primary function of a web browser?",
-            model: "gemini",
-            load_faiss_retriever: false
-          })
-        });
-        if (!res.ok) throw new Error(`HTTP error ${res.status}`);
-        const chatRes = await res.json();
-        log(`Answer: ${chatRes.answer}`);
-      } catch (err) {
-        log(`Error sending message: ${err.message}`);
-        return;
-      }
-
-      // Step 3: Get history
-      log("3. Getting chat history...");
-      try {
-        const res = await fetch(`${API_URL}/sessions/${sessionId}/messages`);
-        if (!res.ok) throw new Error(`HTTP error ${res.status}`);
-        const history = await res.json();
-        log(`History: ${JSON.stringify(history, null, 2)}`);
-      } catch (err) {
-        log(`Error getting history: ${err.message}`);
-      }
-
-      log("Workflow complete!");
-    });
-  </script>
-</body>
-</html>
-```
-
-# **Invoking the Documents API Endpoint**
-
-This guide explains how a frontend application can interact with the FastAPI `/documents` endpoints.
-These endpoints allow you to **add**, **list**, and **delete** documents.
-
----
-
-## **Endpoint Summary**
-
-| HTTP Method | Path                       | Purpose                            | Request Type       |
-| ----------- | -------------------------- | ---------------------------------- | ------------------ |
-| **POST**    | `/documents/`              | Adds a new document.               | `application/json` |
-| **GET**     | `/documents/`              | Lists all documents.               | N/A                |
-| **DELETE**  | `/documents/{document_id}` | Deletes a specific document by ID. | N/A                |
-
----
-
-## **Request & Response Structures**
-
-### **1. Add a New Document**
-
-**POST** `/documents/`
-
-**Request Body** (JSON):
-
-* `title` *(string)* – The title of the document.
-* `content` *(string)* – The content of the document.
-
-**Example Request:**
-
-```json
-{
-  "title": "My First Document",
-  "content": "This is the content of my very first document."
+async function fetchDocs() {
+  const res = await fetch(`${API}/documents/`);
+  const data = await res.json();
+  document.getElementById('docs').innerHTML = data.documents.map(doc =>
+    `<div>${doc.title} <button onclick="delDoc(${doc.id})">Delete</button></div>`
+  ).join('');
 }
+
+document.getElementById('addDoc').onsubmit = async e => {
+  e.preventDefault();
+  const title = document.getElementById('title').value;
+  const content = document.getElementById('content').value;
+  await fetch(`${API}/documents/`, {
+    method: 'POST', headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ title, content })
+  });
+  fetchDocs();
+};
+
+async function delDoc(id) {
+  await fetch(`${API}/documents/${id}`, { method: 'DELETE' });
+  fetchDocs();
+}
+
+fetchDocs();
+</script>
 ```
-
-**Response Body**:
-
-* `message` *(string)* – Success message.
-
----
-
-### **2. List All Documents**
-
-**GET** `/documents/`
-
-**Request Body:** None.
-
-**Response Body**:
-
-* `documents` *(array)* – List of documents. Each object contains:
-
-  * `id` *(integer)*
-  * `title` *(string)*
-  * `content` *(string)*
-  * `created_at` *(timestamp)*
-
----
-
-### **3. Delete a Document**
-
-**DELETE** `/documents/{document_id}`
-
-**Path Parameters:**
-
-* `document_id` *(integer)* – Unique ID of the document to be deleted.
-
-**Response Body**:
-
-* `message` *(string)* – Success message.
-* `document_id` *(integer)* – ID of the deleted document.
-
----
-
-## **Frontend Implementation (JavaScript Example)**
-
-Below is a complete HTML + JavaScript example showing how to **add**, **list**, and **delete** documents using the API.
-
-```html
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Documents API Example</title>
-    <style>
-        body { font-family: sans-serif; padding: 2rem; background-color: #f4f4f9; }
-        .container { max-width: 800px; margin: auto; padding: 2rem; background: white; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
-        h1 { text-align: center; color: #333; }
-        h2 { border-bottom: 2px solid #ccc; padding-bottom: 0.5rem; margin-top: 2rem; }
-        form { margin-bottom: 1rem; }
-        label { display: block; margin-top: 1rem; margin-bottom: 0.5rem; font-weight: bold; }
-        input[type="text"], textarea { width: 100%; padding: 0.5rem; border: 1px solid #ccc; border-radius: 4px; }
-        button { padding: 0.75rem 1.5rem; font-size: 1rem; color: white; background-color: #007bff; border: none; border-radius: 4px; cursor: pointer; margin-top: 1rem; }
-        button:hover { background-color: #0056b3; }
-        .document-item { display: flex; justify-content: space-between; align-items: center; border: 1px solid #ddd; padding: 0.75rem; margin-bottom: 0.5rem; border-radius: 4px; }
-        .document-item button { background-color: #dc3545; padding: 0.5rem; font-size: 0.8rem; }
-        .document-item button:hover { background-color: #c82333; }
-        pre { background-color: #e9ecef; padding: 1rem; border-radius: 4px; white-space: pre-wrap; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>Documents API Example</h1>
-        
-        <h2>Add a New Document</h2>
-        <form id="add-doc-form">
-            <label for="doc-title">Document Title:</label>
-            <input type="text" id="doc-title" required>
-            <label for="doc-content">Document Content:</label>
-            <textarea id="doc-content" rows="4" required></textarea>
-            <button type="submit">Add Document</button>
-        </form>
-
-        <h2>Documents List</h2>
-        <button id="refresh-docs-button">Refresh Documents</button>
-        <div id="documents-list"></div>
-
-        <h2>Log</h2>
-        <pre id="log-output"></pre>
-    </div>
-
-    <script>
-        const API_URL = 'http://[your-api-server]';
-        const logOutput = document.getElementById('log-output');
-        const documentsListDiv = document.getElementById('documents-list');
-
-        function log(message) {
-            logOutput.textContent += message + '\n';
-            logOutput.scrollTop = logOutput.scrollHeight;
-        }
-        
-        document.getElementById('add-doc-form').addEventListener('submit', async (e) => {
-            e.preventDefault();
-            const title = document.getElementById('doc-title').value;
-            const content = document.getElementById('doc-content').value;
-
-            log("Adding a new document...");
-            try {
-                const response = await fetch(`${API_URL}/documents/`, {
-                    method: 'POST',
-                    headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ title, content })
-                });
-
-                if (!response.ok) throw new Error(`HTTP error ${response.status}`);
-
-                const result = await response.json();
-                log(`✅ Added document: ${result.message}`);
-                e.target.reset();
-                getDocuments();
-            } catch (error) {
-                log(`❌ Error: ${error.message}`);
-            }
-        });
-
-        async function getDocuments() {
-            log("Fetching documents...");
-            documentsListDiv.innerHTML = '';
-            try {
-                const response = await fetch(`${API_URL}/documents/`);
-                if (!response.ok) throw new Error(`HTTP error ${response.status}`);
-                
-                const data = await response.json();
-                log(`📄 Found ${data.documents.length} documents.`);
-
-                if (data.documents.length) {
-                    data.documents.forEach(doc => {
-                        const docItem = document.createElement('div');
-                        docItem.className = 'document-item';
-                        docItem.innerHTML = `
-                            <span><strong>${doc.title}</strong> (ID: ${doc.id})</span>
-                            <button onclick="deleteDocument(${doc.id})">Delete</button>
-                        `;
-                        documentsListDiv.appendChild(docItem);
-                    });
-                } else {
-                    documentsListDiv.innerHTML = '<p>No documents found.</p>';
-                }
-            } catch (error) {
-                log(`❌ Error: ${error.message}`);
-            }
-        }
-        
-        async function deleteDocument(id) {
-            log(`Deleting document ID: ${id}`);
-            try {
-                const response = await fetch(`${API_URL}/documents/${id}`, { method: 'DELETE' });
-                if (!response.ok) throw new Error(`HTTP error ${response.status}`);
-                
-                const result = await response.json();
-                log(`🗑 Deleted: ${result.message}`);
-                getDocuments();
-            } catch (error) {
-                log(`❌ Error: ${error.message}`);
-            }
-        }
-        
-        document.getElementById('refresh-docs-button').addEventListener('click', getDocuments);
-        getDocuments();
-    </script>
-</body>
-</html>
-```
\ No newline at end of file
diff --git a/ai-hub/app/api/routes/tts.py b/ai-hub/app/api/routes/tts.py
index 557924a..e305ee2 100644
--- a/ai-hub/app/api/routes/tts.py
+++ b/ai-hub/app/api/routes/tts.py
@@ -10,22 +10,33 @@
     @router.post(
         "",
         summary="Generate speech from text",
-        response_description="Audio bytes in WAV format, either as a complete file or a stream.",
+        response_description="Audio bytes in WAV or PCM format, either as a complete file or a stream.",
     )
     async def create_speech_response(
         request: schemas.SpeechRequest,
         stream: bool = Query(
             False,
             description="If true, returns a streamed audio response. Otherwise, returns a complete file."
+        ),
+        as_wav: bool = Query(
+            True,
+            description="If true, returns WAV format audio. If false, returns raw PCM audio data. Only applies when stream is true."
         )
     ):
         try:
             if stream:
+                # Pass the new as_wav parameter to the streaming function
                 audio_stream_generator: AsyncGenerator[bytes, None] = services.tts_service.create_speech_stream(
-                    text=request.text
+                    text=request.text,
+                    as_wav=as_wav
                 )
-                return StreamingResponse(audio_stream_generator, media_type="audio/wav")
+                
+                # Dynamically set the media_type based on the as_wav flag
+                media_type = "audio/wav" if as_wav else "audio/pcm"
+                
+                return StreamingResponse(audio_stream_generator, media_type=media_type)
             else:
+                # The non-streaming function only returns WAV, so this part remains the same
                 audio_bytes = await services.tts_service.create_speech_non_stream(
                     text=request.text
                 )
diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py
index 150a1ac..4afaa3f 100644
--- a/ai-hub/app/app.py
+++ b/ai-hub/app/app.py
@@ -83,6 +83,7 @@
     tts_provider = get_tts_provider(
         provider_name=settings.TTS_PROVIDER,
         api_key=settings.TTS_API_KEY,
+        model_name = settings.TTS_MODEL_NAME,
         voice_name=settings.TTS_VOICE_NAME
     )
     
diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py
index 858c8ad..390c990 100644
--- a/ai-hub/app/config.py
+++ b/ai-hub/app/config.py
@@ -18,11 +18,12 @@
 class TTSProvider(str, Enum):
     """An enum for supported Text-to-Speech (TTS) providers."""
     GOOGLE_GEMINI = "google_gemini"
+    GCLOUD_TTS = "gcloud_tts"  # NEW: Add Google Cloud TTS as a supported provider
 
 class STTProvider(str, Enum):
     """An enum for supported Speech-to-Text (STT) providers."""
     GOOGLE_GEMINI = "google_gemini"
-    OPENAI = "openai"  # NEW: Add OpenAI as a supported provider
+    OPENAI = "openai"
 
 class ApplicationSettings(BaseModel):
     project_name: str = "Cortex Hub"
@@ -45,6 +46,7 @@
 
 class TTSProviderSettings(BaseModel):
     provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GEMINI)
+    # The default values are kept as originally requested
     voice_name: str = "Kore"
     model_name: str = "gemini-2.5-flash-preview-tts"
     api_key: Optional[SecretStr] = None
@@ -53,9 +55,6 @@
     provider: STTProvider = Field(default=STTProvider.GOOGLE_GEMINI)
     model_name: str = "gemini-2.5-flash"
     api_key: Optional[SecretStr] = None
-    # NOTE: OpenAI provider requires a different model name (e.g., 'whisper-1')
-    # but we will handle this dynamically or through configuration.
-    # The BaseModel is for schema validation, not for provider-specific logic.
 
 class VectorStoreSettings(BaseModel):
     index_path: str = "data/faiss_index.bin"
@@ -125,22 +124,22 @@
         # --- API Keys & Models ---
         self.DEEPSEEK_API_KEY: Optional[str] = os.getenv("DEEPSEEK_API_KEY")
         self.GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
-        self.OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY") # NEW: Add dedicated OpenAI API key
+        self.OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
 
         self.DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME") or \
-                                          get_from_yaml(["llm_providers", "deepseek_model_name"]) or \
-                                          config_from_pydantic.llm_providers.deepseek_model_name
+                                             get_from_yaml(["llm_providers", "deepseek_model_name"]) or \
+                                             config_from_pydantic.llm_providers.deepseek_model_name
         self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \
-                                          get_from_yaml(["llm_providers", "gemini_model_name"]) or \
-                                          config_from_pydantic.llm_providers.gemini_model_name
+                                             get_from_yaml(["llm_providers", "gemini_model_name"]) or \
+                                             config_from_pydantic.llm_providers.gemini_model_name
         
         # --- Vector Store Settings ---
         self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \
-                                          get_from_yaml(["vector_store", "index_path"]) or \
-                                          config_from_pydantic.vector_store.index_path
+                                             get_from_yaml(["vector_store", "index_path"]) or \
+                                             config_from_pydantic.vector_store.index_path
         dimension_str = os.getenv("EMBEDDING_DIMENSION") or \
-                         get_from_yaml(["vector_store", "embedding_dimension"]) or \
-                         config_from_pydantic.vector_store.embedding_dimension
+                                 get_from_yaml(["vector_store", "embedding_dimension"]) or \
+                                 config_from_pydantic.vector_store.embedding_dimension
         self.EMBEDDING_DIMENSION: int = int(dimension_str)
 
         # --- Embedding Provider Settings ---
@@ -149,15 +148,15 @@
             embedding_provider_env = embedding_provider_env.lower()
 
         self.EMBEDDING_PROVIDER: EmbeddingProvider = EmbeddingProvider(embedding_provider_env or \
-                                                                       get_from_yaml(["embedding_provider", "provider"]) or \
-                                                                       config_from_pydantic.embedding_provider.provider)
+                                                                        get_from_yaml(["embedding_provider", "provider"]) or \
+                                                                        config_from_pydantic.embedding_provider.provider)
         self.EMBEDDING_MODEL_NAME: str = os.getenv("EMBEDDING_MODEL_NAME") or \
-                                          get_from_yaml(["embedding_provider", "model_name"]) or \
-                                          config_from_pydantic.embedding_provider.model_name
+                                             get_from_yaml(["embedding_provider", "model_name"]) or \
+                                             config_from_pydantic.embedding_provider.model_name
         
         self.EMBEDDING_API_KEY: Optional[str] = os.getenv("EMBEDDING_API_KEY") or \
-                                                get_from_yaml(["embedding_provider", "api_key"]) or \
-                                                self.GEMINI_API_KEY
+                                                 get_from_yaml(["embedding_provider", "api_key"]) or \
+                                                 self.GEMINI_API_KEY
 
         # --- TTS Provider Settings ---
         tts_provider_env = os.getenv("TTS_PROVIDER")
@@ -168,17 +167,23 @@
                                                      get_from_yaml(["tts_provider", "provider"]) or \
                                                      config_from_pydantic.tts_provider.provider)
         self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \
-                                   get_from_yaml(["tts_provider", "voice_name"]) or \
-                                   config_from_pydantic.tts_provider.voice_name
+                                     get_from_yaml(["tts_provider", "voice_name"]) or \
+                                     config_from_pydantic.tts_provider.voice_name
+        
         self.TTS_MODEL_NAME: str = os.getenv("TTS_MODEL_NAME") or \
-                                   get_from_yaml(["tts_provider", "model_name"]) or \
-                                   config_from_pydantic.tts_provider.model_name
+                                     get_from_yaml(["tts_provider", "model_name"]) or \
+                                     config_from_pydantic.tts_provider.model_name
+
+        # API Key logic for TTS
+        tts_api_key_env = os.getenv("TTS_API_KEY") or get_from_yaml(["tts_provider", "api_key"])
         
-        self.TTS_API_KEY: Optional[str] = os.getenv("TTS_API_KEY") or \
-                                          get_from_yaml(["tts_provider", "api_key"]) or \
-                                          self.GEMINI_API_KEY
+        if tts_api_key_env:
+            self.TTS_API_KEY: Optional[str] = tts_api_key_env
+        else:
+            # If no specific TTS key is set, use the Gemini key as a fallback
+            self.TTS_API_KEY: Optional[str] = self.GEMINI_API_KEY
         
-        # --- NEW STT Provider Settings ---
+        # --- STT Provider Settings ---
         stt_provider_env = os.getenv("STT_PROVIDER")
         if stt_provider_env:
             stt_provider_env = stt_provider_env.lower()
@@ -187,11 +192,10 @@
                                                      get_from_yaml(["stt_provider", "provider"]) or \
                                                      config_from_pydantic.stt_provider.provider)
         self.STT_MODEL_NAME: str = os.getenv("STT_MODEL_NAME") or \
-                                   get_from_yaml(["stt_provider", "model_name"]) or \
-                                   config_from_pydantic.stt_provider.model_name
+                                     get_from_yaml(["stt_provider", "model_name"]) or \
+                                     config_from_pydantic.stt_provider.model_name
 
         # Logic for STT_API_KEY: Prioritize a dedicated STT_API_KEY.
-        # Fallback to OPENAI_API_KEY if the provider is OpenAI, otherwise use GEMINI_API_KEY.
         explicit_stt_api_key = os.getenv("STT_API_KEY") or get_from_yaml(["stt_provider", "api_key"])
         
         if explicit_stt_api_key:
diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml
index 99ec019..0f9028c 100644
--- a/ai-hub/app/config.yaml
+++ b/ai-hub/app/config.yaml
@@ -34,11 +34,12 @@
   
 tts_provider:
   # The provider for the TTS service.
-  provider: "google_gemini"
+  # Check more at https://cloud.google.com/text-to-speech
+  provider: "gcloud_tts"
   # The name of the voice to use for TTS.
-  voice_name: "Zephyr"
+  voice_name: "en-US-Chirp3-HD-Achernar"
   # The model name for the TTS service.
-  model_name: "gemini-2.5-flash-preview-tts"
+  model_name: "gemini-2.5-pro-preview-tts"
 
 # The provider for the Speech-to-Text (STT) service.
 stt_provider:
diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py
index 6cfbed0..725f871 100644
--- a/ai-hub/app/core/providers/factory.py
+++ b/ai-hub/app/core/providers/factory.py
@@ -3,6 +3,7 @@
 from .llm.deepseek import DeepSeekProvider
 from .llm.gemini import GeminiProvider
 from .tts.gemini import GeminiTTSProvider
+from .tts.gcloud_tts import GCloudTTSProvider
 from .stt.gemini import GoogleSTTProvider
 from openai import AsyncOpenAI
 
@@ -24,10 +25,12 @@
         raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}")
     return provider
 
-def get_tts_provider(provider_name: str, api_key: str, voice_name: str) -> TTSProvider:
+def get_tts_provider(provider_name: str, api_key: str, model_name: str, voice_name: str) -> TTSProvider:
     if provider_name == "google_gemini":
-        return GeminiTTSProvider(api_key=api_key, voice_name = voice_name)
-    raise ValueError(f"Unsupported TTS provider: '{provider_name}'. Supported providers are: ['google_gemini']")
+        return GeminiTTSProvider(api_key=api_key,model_name = model_name, voice_name = voice_name)
+    elif provider_name == "gcloud_tts":
+        return GCloudTTSProvider(api_key=api_key, voice_name = voice_name)
+    raise ValueError(f"Unsupported TTS provider: '{provider_name}'. Supported providers are: ['google_gemini', 'gcloud_tts']")
 
 def get_stt_provider(provider_name: str, api_key: str, model_name: str) -> STTProvider:
     if provider_name == "google_gemini":
diff --git a/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_bearer.sh b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_bearer.sh
new file mode 100644
index 0000000..d753b84
--- /dev/null
+++ b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_bearer.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Bearer token (best to set this as an environment variable)
+BEARER_TOKEN="<YOUR TOKEN>"
+
+# Google TTS API endpoint
+TTS_API_URL="https://texttospeech.googleapis.com/v1/text:synthesize"
+
+# Request payload
+read -r -d '' PAYLOAD <<EOF
+{
+  "input": {
+    "text": "Hello, this is a sample text to speech synthesis request."
+  },
+  "voice": {
+    "languageCode": "en-US",
+    "name": "en-US-Wavenet-D"
+  },
+  "audioConfig": {
+    "audioEncoding": "MP3"
+  }
+}
+EOF
+
+# Make the request and extract audioContent
+AUDIO_BASE64=$(curl -s -X POST "$TTS_API_URL" \
+  -H "Authorization: Bearer $BEARER_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d "$PAYLOAD" | jq -r '.audioContent')
+
+# Decode base64 and save to MP3
+echo "$AUDIO_BASE64" | base64 --decode > output.mp3
+
+echo "✅ MP3 file saved to: output.mp3"
diff --git a/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key.sh b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key.sh
new file mode 100644
index 0000000..b62fc65
--- /dev/null
+++ b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Your Google Cloud API key
+API_KEY="<YOUR KEY>"
+
+# Google TTS API endpoint with API key as query param
+TTS_API_URL="https://texttospeech.googleapis.com/v1/text:synthesize?key=$API_KEY"
+
+# Request payload
+read -r -d '' PAYLOAD <<EOF
+{
+  "input": {
+    "text": "Hello, I am jerry who are you."
+  },
+  "voice": {
+    "languageCode": "en-US",
+    "name": "en-US-Wavenet-D"
+  },
+  "audioConfig": {
+    "audioEncoding": "MP3"
+  }
+}
+EOF
+
+# Make the request and extract audioContent
+AUDIO_BASE64=$(curl -s -X POST "$TTS_API_URL" \
+  -H "Content-Type: application/json" \
+  -d "$PAYLOAD" | jq -r '.audioContent')
+
+# Decode base64 and save to MP3
+echo "$AUDIO_BASE64" | base64 --decode > output.mp3
+
+echo "✅ MP3 file saved to: output.mp3"
diff --git a/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key_pcm.sh b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key_pcm.sh
new file mode 100644
index 0000000..5149058
--- /dev/null
+++ b/ai-hub/app/core/providers/tts/_debug/test_google_tts_script_key_pcm.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Your Google Cloud API key
+API_KEY="<YOUR KEY>"
+
+# Google TTS API endpoint with API key as query param
+TTS_API_URL="https://texttospeech.googleapis.com/v1/text:synthesize?key=$API_KEY"
+
+# Request payload (set audioEncoding to LINEAR16 for PCM)
+read -r -d '' PAYLOAD <<EOF
+{
+  "input": {
+    "text": "Hello, I am jerry who are you."
+  },
+  "voice": {
+    "languageCode": "en-US",
+    "name": "en-US-Wavenet-D"
+  },
+  "audioConfig": {
+    "audioEncoding": "LINEAR16"
+  }
+}
+EOF
+
+# Make the request and extract audioContent
+AUDIO_BASE64=$(curl -s -X POST "$TTS_API_URL" \
+  -H "Content-Type: application/json" \
+  -d "$PAYLOAD" | jq -r '.audioContent')
+
+# Decode base64 and save to output.pcm
+echo "$AUDIO_BASE64" | base64 --decode > output.pcm
+
+echo "✅ PCM audio saved to: output.pcm"
diff --git a/ai-hub/app/core/providers/tts/gcloud_tts.py b/ai-hub/app/core/providers/tts/gcloud_tts.py
new file mode 100644
index 0000000..bfdb1b1
--- /dev/null
+++ b/ai-hub/app/core/providers/tts/gcloud_tts.py
@@ -0,0 +1,86 @@
+import os
+import aiohttp
+import asyncio
+import base64
+import logging
+from typing import AsyncGenerator
+from app.core.providers.base import TTSProvider
+from aiohttp import ClientResponseError
+from fastapi import HTTPException
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+# New concrete class for the Google Cloud Text-to-Speech API
+class GCloudTTSProvider(TTSProvider):
+    # This provider uses Google's dedicated TTS API. The voices are different from Gemini.
+    # Here is a small, representative list of available WaveNet voices.
+    # The full list is much larger and can be found in the official documentation.
+    AVAILABLE_VOICES = [
+        "en-US-Wavenet-A", "en-US-Wavenet-B", "en-US-Wavenet-C", "en-US-Wavenet-D",
+        "en-US-Wavenet-E", "en-US-Wavenet-F", "en-US-Wavenet-G", "en-US-Wavenet-H"
+    ]
+
+    def __init__(self, api_key: str, voice_name: str = "en-US-Wavenet-D"):
+        if voice_name not in self.AVAILABLE_VOICES:
+            raise ValueError(f"Invalid voice name: {voice_name}. Choose from {self.AVAILABLE_VOICES}")
+            
+        self.api_key = api_key
+        # The new API URL for the Cloud Text-to-Speech service
+        self.api_url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={self.api_key}"
+        self.voice_name = voice_name
+        logger.debug(f"Initialized GCloudTTSProvider with voice: {self.voice_name}")
+
+    async def generate_speech(self, text: str) -> bytes:
+        logger.debug(f"Starting speech generation for text: '{text[:50]}...'")
+        
+        headers = {
+            "Content-Type": "application/json"
+        }
+        json_data = {
+            "input": {
+                "text": text
+            },
+            "voice": {
+                "languageCode": "en-US",
+                "name": self.voice_name
+            },
+            "audioConfig": {
+                "audioEncoding": "LINEAR16"
+            }
+        }
+        
+        logger.debug(f"API Request URL: {self.api_url}")
+        logger.debug(f"Request Payload: {json_data}")
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(self.api_url, headers=headers, json=json_data) as response:
+                    logger.debug(f"Received API response with status code: {response.status}")
+                    response.raise_for_status()
+                    
+                    response_json = await response.json()
+                    logger.debug("Successfully parsed API response JSON.")
+                    
+                    # The audio data is now under the 'audioContent' key
+                    audio_base64 = response_json.get('audioContent')
+                    if not audio_base64:
+                        raise KeyError("audioContent key not found in the response.")
+                    
+                    audio_bytes = base64.b64decode(audio_base64)
+                    logger.debug(f"Decoded audio data, size: {len(audio_bytes)} bytes.")
+                    
+                    return audio_bytes
+        except ClientResponseError as e:
+            if e.status == 429:
+                logger.error("Rate limit exceeded on Cloud TTS API.")
+                raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
+            else:
+                logger.error(f"Aiohttp client error occurred: {e}")
+                raise HTTPException(status_code=500, detail=f"API request failed: {e}")
+        except KeyError as e:
+            logger.error(f"Key error in API response: {e}. Full response: {await response.json()}")
+            raise HTTPException(status_code=500, detail="Malformed API response from Cloud TTS.")
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during speech generation: {e}")
+            raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}")
\ No newline at end of file
diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py
index 264557c..89d59e1 100644
--- a/ai-hub/app/core/providers/tts/gemini.py
+++ b/ai-hub/app/core/providers/tts/gemini.py
@@ -5,6 +5,9 @@
 import logging
 from typing import AsyncGenerator
 from app.core.providers.base import TTSProvider
+from aiohttp import ClientResponseError
+from fastapi import HTTPException
+
 
 # Configure logging
 logger = logging.getLogger(__name__)
@@ -79,9 +82,13 @@
                     logger.debug(f"Decoded audio data, size: {len(audio_bytes)} bytes.")
                     
                     return audio_bytes
-        except aiohttp.ClientError as e:
-            logger.error(f"Aiohttp client error occurred: {e}")
-            raise HTTPException(status_code=500, detail=f"API request failed: {e}")
+        except ClientResponseError as e:
+            if e.status == 429:
+                logger.error("Rate limit exceeded on Gemini TTS API.")
+                raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
+            else:
+                logger.error(f"Aiohttp client error occurred: {e}")
+                raise HTTPException(status_code=500, detail=f"API request failed: {e}")
         except KeyError as e:
             logger.error(f"Key error in API response: {e}. Full response: {response_json}")
             raise HTTPException(status_code=500, detail="Malformed API response from Gemini.")
diff --git a/ai-hub/app/core/services/tts.py b/ai-hub/app/core/services/tts.py
index 63298e4..d658e6c 100644
--- a/ai-hub/app/core/services/tts.py
+++ b/ai-hub/app/core/services/tts.py
@@ -10,6 +10,19 @@
 # --- Configure logging ---
 logger = logging.getLogger(__name__)
 
+# --- Helper Functions ---
+def _create_wav_file(pcm_data: bytes) -> bytes:
+    """
+    Wraps raw 16-bit PCM audio data in a WAV header.
+    """
+    with io.BytesIO() as wav_buffer:
+        with wave.open(wav_buffer, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(24000) 
+            wav_file.writeframes(pcm_data)
+        return wav_buffer.getvalue()
+
 # --- Define TTS Service Class ---
 class TTSService:
     """
@@ -18,37 +31,25 @@
     audio generation, splitting text into manageable chunks.
     """
     
-    # Use an environment variable or a default value for the max chunk size
-    MAX_CHUNK_SIZE = int(os.getenv("TTS_MAX_CHUNK_SIZE", 200))
+    MAX_CHUNK_SIZE = int(os.getenv("TTS_MAX_CHUNK_SIZE", 600))
 
     def __init__(self, tts_provider: TTSProvider):
-        """
-        Initializes the TTSService with a concrete TTS provider.
-        """
         self.tts_provider = tts_provider
 
     async def _split_text_into_chunks(self, text: str) -> list[str]:
-        """
-        Splits the input text into chunks based on a maximum size and
-        period delimiters, ensuring no chunk exceeds the limit.
-        """
         chunks = []
         current_chunk = ""
-        # Use a list of punctuation to split sentences more effectively
         separators = ['.', '?', '!', '\n']
         sentences = []
         
-        # Split text by multiple delimiters
         for separator in separators:
             text = text.replace(separator, f"{separator}|")
         sentences_with_empty = [s.strip() for s in text.split('|') if s.strip()]
 
-        # Re-join sentences with their delimiters, so we don't lose them
         for sentence in sentences_with_empty:
             sentences.append(sentence)
 
         for sentence in sentences:
-            # Add the sentence and check if it exceeds the chunk size.
             if len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE and current_chunk:
                 chunks.append(current_chunk.strip())
                 current_chunk = sentence + " "
@@ -61,32 +62,14 @@
         logger.debug(f"Split text into {len(chunks)} chunks.")
         return chunks
 
-    async def create_speech_stream(self, text: str) -> AsyncGenerator[bytes, None]:
-        """
-        Generates a stream of complete, playable WAV files for each text chunk.
-        This provides a streaming-like experience even with a non-streaming backend
-        by sending each chunk as soon as it is generated.
-        """
+    async def _generate_pcm_chunks(self, text: str) -> AsyncGenerator[bytes, None]:
         chunks = await self._split_text_into_chunks(text)
-
+        
         for i, chunk in enumerate(chunks):
-            logger.info(f"Processing chunk {i+1}/{len(chunks)} for streaming...")
-
+            logger.info(f"Generating PCM for chunk {i+1}/{len(chunks)}: '{chunk[:30]}...'")
             try:
-                # Get the raw PCM audio data for this chunk
                 pcm_data = await self.tts_provider.generate_speech(chunk)
-
-                # Wrap the PCM data in a WAV header to make it a playable file
-                with io.BytesIO() as wav_buffer:
-                    with wave.open(wav_buffer, 'wb') as wav_file:
-                        wav_file.setnchannels(1)
-                        wav_file.setsampwidth(2)
-                        wav_file.setframerate(24000) 
-                        wav_file.writeframes(pcm_data)
-                    
-                    # Yield a complete, playable WAV file for the chunk
-                    yield wav_buffer.getvalue()
-
+                yield pcm_data
             except Exception as e:
                 logger.error(f"Error processing chunk {i+1}: {e}")
                 raise HTTPException(
@@ -94,22 +77,36 @@
                     detail=f"Error generating speech for chunk {i+1}: {e}"
                 ) from e
 
+    async def create_speech_stream(self, text: str, as_wav: bool = True) -> AsyncGenerator[bytes, None]:
+        async for pcm_data in self._generate_pcm_chunks(text):
+            if as_wav:
+                yield _create_wav_file(pcm_data)
+            else:
+                yield pcm_data
+
     async def create_speech_non_stream(self, text: str) -> bytes:
-        """
-        Generates a complete audio file from the given text, splitting it
-        into chunks and concatenating the audio into a single WAV file.
-        All chunks are processed concurrently for speed.
-        """
         chunks = await self._split_text_into_chunks(text)
-        
-        all_pcm_data = []
-        
-        # Create a list of tasks for each chunk to run them concurrently.
-        tasks = [self.tts_provider.generate_speech(chunk) for chunk in chunks]
-        
+        semaphore = asyncio.Semaphore(3)  # Limit concurrency to 3 requests
+
+        async def generate_with_limit(chunk):
+            retries = 3
+            delay = 1
+            async with semaphore:
+                for attempt in range(retries):
+                    try:
+                        return await self.tts_provider.generate_speech(chunk)
+                    except HTTPException as e:
+                        if e.status_code == 429:
+                            logger.warning(f"429 Too Many Requests for chunk, retrying in {delay}s (attempt {attempt+1}/{retries})...")
+                            await asyncio.sleep(delay)
+                            delay *= 2  # exponential backoff
+                        else:
+                            raise
+                raise HTTPException(status_code=429, detail="Too many requests after retries.")
+
+        tasks = [generate_with_limit(chunk) for chunk in chunks]
+
         try:
-            # Gather the results from all tasks. This will run all API calls
-            # to the TTS provider concurrently.
             all_pcm_data = await asyncio.gather(*tasks)
             logger.info(f"Successfully gathered audio data for all {len(chunks)} chunks.")
         except Exception as e:
@@ -123,17 +120,7 @@
             logger.warning("No audio data was generated.")
             raise HTTPException(status_code=500, detail="No audio data was generated from the TTS provider.")
 
-        # Concatenate all the raw PCM data into a single stream
         concatenated_pcm = b''.join(all_pcm_data)
         logger.info(f"Concatenated {len(chunks)} chunks into a single PCM stream.")
 
-        # Wrap the complete PCM stream in a single WAV container
-        with io.BytesIO() as wav_buffer:
-            with wave.open(wav_buffer, 'wb') as wav_file:
-                wav_file.setnchannels(1)
-                wav_file.setsampwidth(2)
-                # The Gemini API returns 24kHz audio, adjust if using a different provider
-                wav_file.setframerate(24000) 
-                wav_file.writeframes(concatenated_pcm)
-            
-            return wav_buffer.getvalue()
\ No newline at end of file
+        return _create_wav_file(concatenated_pcm)
diff --git a/ai-hub/run_integration_tests.sh b/ai-hub/run_integration_tests.sh
index 46489d2..e0f94d5 100644
--- a/ai-hub/run_integration_tests.sh
+++ b/ai-hub/run_integration_tests.sh
@@ -7,15 +7,15 @@
 # You can define aliases for your test file paths here.
 TEST_SUITES=(
   "All tests"
-  "integration_tests/test_sessions.py"
-  "integration_tests/test_documents.py"
-  "integration_tests/test_misc.py"
+  "integration_tests/test_sessions_api.py"
+  "integration_tests/test_documents_api.py"
+  "integration_tests/test_misc_api.py"
 )
 TEST_PATHS=(
   "integration_tests/"
-  "integration_tests/test_sessions.py"
-  "integration_tests/test_documents.py"
-  "integration_tests/test_misc.py"
+  "integration_tests/test_sessions_api.py"
+  "integration_tests/test_documents_api.py"
+  "integration_tests/test_misc_api.py"
 )
 
 export DB_MODE=sqlite
diff --git a/ai-hub/tests/api/routes/test_tts.py b/ai-hub/tests/api/routes/test_tts.py
index cd4f14e..0eb2bbb 100644
--- a/ai-hub/tests/api/routes/test_tts.py
+++ b/ai-hub/tests/api/routes/test_tts.py
@@ -19,20 +19,20 @@
     mock_services.tts_service.create_speech_non_stream.assert_called_once_with(text="Hello, this is a test")
 
 @pytest.mark.asyncio
-async def test_create_speech_stream_response(async_client):
-    """Test the /speech endpoint with stream=true returns a streaming response."""
+async def test_create_speech_stream_wav_response(async_client):
+    """Test the /speech endpoint with stream=true and as_wav=true returns a streamed WAV response."""
     test_client, mock_services = await anext(async_client)
     mock_audio_bytes_chunks = [b"chunk1", b"chunk2", b"chunk3"]
 
-    # This async generator mock correctly simulates the streaming service
     async def mock_async_generator():
         for chunk in mock_audio_bytes_chunks:
             yield chunk
 
-    # We mock `create_speech_stream` with a MagicMock returning the async generator
+    # Mock `create_speech_stream` with a MagicMock returning the async generator
     mock_services.tts_service.create_speech_stream = MagicMock(return_value=mock_async_generator())
 
-    response = await test_client.post("/speech?stream=true", json={"text": "Hello, this is a test"})
+    # Explicitly set stream=true and as_wav=true
+    response = await test_client.post("/speech?stream=true&as_wav=true", json={"text": "Hello, this is a test"})
 
     assert response.status_code == 200
     assert response.headers["content-type"] == "audio/wav"
@@ -43,4 +43,29 @@
         streamed_content += chunk
 
     assert streamed_content == b"".join(mock_audio_bytes_chunks)
-    mock_services.tts_service.create_speech_stream.assert_called_once_with(text="Hello, this is a test")
\ No newline at end of file
+    mock_services.tts_service.create_speech_stream.assert_called_once_with(text="Hello, this is a test", as_wav=True)
+
+@pytest.mark.asyncio
+async def test_create_speech_stream_pcm_response(async_client):
+    """Test the /speech endpoint with stream=true and as_wav=false returns a streamed PCM response."""
+    test_client, mock_services = await anext(async_client)
+    mock_audio_bytes_chunks = [b"pcm_chunk1", b"pcm_chunk2", b"pcm_chunk3"]
+
+    async def mock_async_generator():
+        for chunk in mock_audio_bytes_chunks:
+            yield chunk
+
+    mock_services.tts_service.create_speech_stream = MagicMock(return_value=mock_async_generator())
+
+    # Set stream=true and as_wav=false
+    response = await test_client.post("/speech?stream=true&as_wav=false", json={"text": "Hello, this is a test"})
+
+    assert response.status_code == 200
+    assert response.headers["content-type"] == "audio/pcm"
+
+    streamed_content = b""
+    async for chunk in response.aiter_bytes():
+        streamed_content += chunk
+
+    assert streamed_content == b"".join(mock_audio_bytes_chunks)
+    mock_services.tts_service.create_speech_stream.assert_called_once_with(text="Hello, this is a test", as_wav=False)
\ No newline at end of file
diff --git a/ai-hub/tests/core/providers/test_factory.py b/ai-hub/tests/core/providers/test_factory.py
index 3d48c03..e00e71b 100644
--- a/ai-hub/tests/core/providers/test_factory.py
+++ b/ai-hub/tests/core/providers/test_factory.py
@@ -27,11 +27,11 @@
 
 def test_get_tts_provider_returns_gemini_tts_provider():
     """Tests that the factory returns a GeminiTTSProvider instance for 'google_gemini'."""
-    # Use a valid voice from AVAILABLE_VOICES to avoid ValueError
     valid_voice = GeminiTTSProvider.AVAILABLE_VOICES[0]
     provider = get_tts_provider(
         "google_gemini",
         api_key="dummy_key",
+        model_name="dummy-model",
         voice_name=valid_voice
     )
     assert isinstance(provider, GeminiTTSProvider)
@@ -45,6 +45,7 @@
         get_tts_provider(
             "unknown",
             api_key="dummy_key",
+            model_name="dummy-model",
             voice_name=valid_voice
         )