diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/ai-hub/integration_tests/test_service.py b/ai-hub/integration_tests/test_service.py new file mode 100644 index 0000000..7e77e25 --- /dev/null +++ b/ai-hub/integration_tests/test_service.py @@ -0,0 +1,62 @@ +# integration_tests/test_service.py + +import requests +import os +from dotenv import load_dotenv + +# --- Configuration --- +# The base URL for our running service. +# Note: We use http://ai-hub:8000 when running Docker-to-Docker, +# but http://127.0.0.1:8000 when running from the host machine. +# For simplicity, we will run this script from the host. +BASE_URL = "http://127.0.0.1:8000" + +# Load the .env file to check if the API key is set +load_dotenv() +API_KEY = os.getenv("DEEPSEEK_API_KEY") + +def test_root_endpoint(): + """Checks if the service is alive.""" + print("Testing root endpoint...") + response = requests.get(f"{BASE_URL}/") + + assert response.status_code == 200 + assert response.json()["status"] == "AI Model Hub is running!" + print("Root endpoint test: PASSED") + +def test_chat_endpoint(): + """ + Sends a real prompt to the /chat endpoint and verifies a valid response. + This will make a REAL API call to DeepSeek and requires a valid key. + """ + print("\nTesting /chat endpoint...") + if not API_KEY or "YOUR_API_KEY" in API_KEY: + print("SKIPPING test: DEEPSEEK_API_KEY not set in .env file.") + return + + json_payload = {"prompt": "Explain what an integration test is in one sentence."} + + try: + response = requests.post(f"{BASE_URL}/chat", json=json_payload, timeout=30) + + # Check for successful HTTP status + assert response.status_code == 200 + + # Check the response body + data = response.json() + assert "response" in data + assert isinstance(data["response"], str) + assert len(data["response"]) > 0 + + print(f"Received response: '{data['response']}'") + print("/chat endpoint test: PASSED") + + except requests.exceptions.RequestException as e: + print(f"/chat endpoint test: FAILED - {e}") + assert False, f"Request failed: {e}" + +if __name__ == "__main__": + print("--- Running Integration Tests ---") + test_root_endpoint() + test_chat_endpoint() + print("\n--- All tests completed ---") \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/ai-hub/integration_tests/test_service.py b/ai-hub/integration_tests/test_service.py new file mode 100644 index 0000000..7e77e25 --- /dev/null +++ b/ai-hub/integration_tests/test_service.py @@ -0,0 +1,62 @@ +# integration_tests/test_service.py + +import requests +import os +from dotenv import load_dotenv + +# --- Configuration --- +# The base URL for our running service. +# Note: We use http://ai-hub:8000 when running Docker-to-Docker, +# but http://127.0.0.1:8000 when running from the host machine. +# For simplicity, we will run this script from the host. +BASE_URL = "http://127.0.0.1:8000" + +# Load the .env file to check if the API key is set +load_dotenv() +API_KEY = os.getenv("DEEPSEEK_API_KEY") + +def test_root_endpoint(): + """Checks if the service is alive.""" + print("Testing root endpoint...") + response = requests.get(f"{BASE_URL}/") + + assert response.status_code == 200 + assert response.json()["status"] == "AI Model Hub is running!" + print("Root endpoint test: PASSED") + +def test_chat_endpoint(): + """ + Sends a real prompt to the /chat endpoint and verifies a valid response. + This will make a REAL API call to DeepSeek and requires a valid key. + """ + print("\nTesting /chat endpoint...") + if not API_KEY or "YOUR_API_KEY" in API_KEY: + print("SKIPPING test: DEEPSEEK_API_KEY not set in .env file.") + return + + json_payload = {"prompt": "Explain what an integration test is in one sentence."} + + try: + response = requests.post(f"{BASE_URL}/chat", json=json_payload, timeout=30) + + # Check for successful HTTP status + assert response.status_code == 200 + + # Check the response body + data = response.json() + assert "response" in data + assert isinstance(data["response"], str) + assert len(data["response"]) > 0 + + print(f"Received response: '{data['response']}'") + print("/chat endpoint test: PASSED") + + except requests.exceptions.RequestException as e: + print(f"/chat endpoint test: FAILED - {e}") + assert False, f"Request failed: {e}" + +if __name__ == "__main__": + print("--- Running Integration Tests ---") + test_root_endpoint() + test_chat_endpoint() + print("\n--- All tests completed ---") \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt new file mode 100644 index 0000000..b45a893 --- /dev/null +++ b/ai-hub/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn[standard] +google-generativeai +python-dotenv +openai +pytest +requests \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/ai-hub/integration_tests/test_service.py b/ai-hub/integration_tests/test_service.py new file mode 100644 index 0000000..7e77e25 --- /dev/null +++ b/ai-hub/integration_tests/test_service.py @@ -0,0 +1,62 @@ +# integration_tests/test_service.py + +import requests +import os +from dotenv import load_dotenv + +# --- Configuration --- +# The base URL for our running service. +# Note: We use http://ai-hub:8000 when running Docker-to-Docker, +# but http://127.0.0.1:8000 when running from the host machine. +# For simplicity, we will run this script from the host. +BASE_URL = "http://127.0.0.1:8000" + +# Load the .env file to check if the API key is set +load_dotenv() +API_KEY = os.getenv("DEEPSEEK_API_KEY") + +def test_root_endpoint(): + """Checks if the service is alive.""" + print("Testing root endpoint...") + response = requests.get(f"{BASE_URL}/") + + assert response.status_code == 200 + assert response.json()["status"] == "AI Model Hub is running!" + print("Root endpoint test: PASSED") + +def test_chat_endpoint(): + """ + Sends a real prompt to the /chat endpoint and verifies a valid response. + This will make a REAL API call to DeepSeek and requires a valid key. + """ + print("\nTesting /chat endpoint...") + if not API_KEY or "YOUR_API_KEY" in API_KEY: + print("SKIPPING test: DEEPSEEK_API_KEY not set in .env file.") + return + + json_payload = {"prompt": "Explain what an integration test is in one sentence."} + + try: + response = requests.post(f"{BASE_URL}/chat", json=json_payload, timeout=30) + + # Check for successful HTTP status + assert response.status_code == 200 + + # Check the response body + data = response.json() + assert "response" in data + assert isinstance(data["response"], str) + assert len(data["response"]) > 0 + + print(f"Received response: '{data['response']}'") + print("/chat endpoint test: PASSED") + + except requests.exceptions.RequestException as e: + print(f"/chat endpoint test: FAILED - {e}") + assert False, f"Request failed: {e}" + +if __name__ == "__main__": + print("--- Running Integration Tests ---") + test_root_endpoint() + test_chat_endpoint() + print("\n--- All tests completed ---") \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt new file mode 100644 index 0000000..b45a893 --- /dev/null +++ b/ai-hub/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn[standard] +google-generativeai +python-dotenv +openai +pytest +requests \ No newline at end of file diff --git a/ai-hub/run_integration_tests.sh b/ai-hub/run_integration_tests.sh new file mode 100644 index 0000000..e5e4376 --- /dev/null +++ b/ai-hub/run_integration_tests.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# A script to automate running integration tests locally. +# It starts the FastAPI server, runs the tests, and then shuts down the server. + +echo "--- Starting AI Hub Server for Integration Tests ---" + +# Start the uvicorn server in the background +uvicorn app.main:app --host 127.0.0.1 --port 8000 & + +# Get the Process ID (PID) of the background server +SERVER_PID=$! + +# Define a cleanup function to be called on exit +cleanup() { + echo "--- Shutting Down Server (PID: $SERVER_PID) ---" + kill $SERVER_PID +} + +# Register the cleanup function to run when the script exits +# This ensures the server is stopped even if tests fail or script is interrupted (Ctrl+C) +trap cleanup EXIT + +echo "Server started with PID: $SERVER_PID. Waiting for it to initialize..." + +# Wait a few seconds to ensure the server is fully up and running +sleep 5 + +echo "--- Running Integration Test Script ---" + +# Execute the Python integration test script +python3 integration_tests/test_service.py + +# Capture the exit code of the test script +TEST_EXIT_CODE=$? + +# The 'trap' will automatically call the cleanup function now. +# Exit with the same code as the test script (0 for success, non-zero for failure). +exit $TEST_EXIT_CODE \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/ai-hub/integration_tests/test_service.py b/ai-hub/integration_tests/test_service.py new file mode 100644 index 0000000..7e77e25 --- /dev/null +++ b/ai-hub/integration_tests/test_service.py @@ -0,0 +1,62 @@ +# integration_tests/test_service.py + +import requests +import os +from dotenv import load_dotenv + +# --- Configuration --- +# The base URL for our running service. +# Note: We use http://ai-hub:8000 when running Docker-to-Docker, +# but http://127.0.0.1:8000 when running from the host machine. +# For simplicity, we will run this script from the host. +BASE_URL = "http://127.0.0.1:8000" + +# Load the .env file to check if the API key is set +load_dotenv() +API_KEY = os.getenv("DEEPSEEK_API_KEY") + +def test_root_endpoint(): + """Checks if the service is alive.""" + print("Testing root endpoint...") + response = requests.get(f"{BASE_URL}/") + + assert response.status_code == 200 + assert response.json()["status"] == "AI Model Hub is running!" + print("Root endpoint test: PASSED") + +def test_chat_endpoint(): + """ + Sends a real prompt to the /chat endpoint and verifies a valid response. + This will make a REAL API call to DeepSeek and requires a valid key. + """ + print("\nTesting /chat endpoint...") + if not API_KEY or "YOUR_API_KEY" in API_KEY: + print("SKIPPING test: DEEPSEEK_API_KEY not set in .env file.") + return + + json_payload = {"prompt": "Explain what an integration test is in one sentence."} + + try: + response = requests.post(f"{BASE_URL}/chat", json=json_payload, timeout=30) + + # Check for successful HTTP status + assert response.status_code == 200 + + # Check the response body + data = response.json() + assert "response" in data + assert isinstance(data["response"], str) + assert len(data["response"]) > 0 + + print(f"Received response: '{data['response']}'") + print("/chat endpoint test: PASSED") + + except requests.exceptions.RequestException as e: + print(f"/chat endpoint test: FAILED - {e}") + assert False, f"Request failed: {e}" + +if __name__ == "__main__": + print("--- Running Integration Tests ---") + test_root_endpoint() + test_chat_endpoint() + print("\n--- All tests completed ---") \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt new file mode 100644 index 0000000..b45a893 --- /dev/null +++ b/ai-hub/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn[standard] +google-generativeai +python-dotenv +openai +pytest +requests \ No newline at end of file diff --git a/ai-hub/run_integration_tests.sh b/ai-hub/run_integration_tests.sh new file mode 100644 index 0000000..e5e4376 --- /dev/null +++ b/ai-hub/run_integration_tests.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# A script to automate running integration tests locally. +# It starts the FastAPI server, runs the tests, and then shuts down the server. + +echo "--- Starting AI Hub Server for Integration Tests ---" + +# Start the uvicorn server in the background +uvicorn app.main:app --host 127.0.0.1 --port 8000 & + +# Get the Process ID (PID) of the background server +SERVER_PID=$! + +# Define a cleanup function to be called on exit +cleanup() { + echo "--- Shutting Down Server (PID: $SERVER_PID) ---" + kill $SERVER_PID +} + +# Register the cleanup function to run when the script exits +# This ensures the server is stopped even if tests fail or script is interrupted (Ctrl+C) +trap cleanup EXIT + +echo "Server started with PID: $SERVER_PID. Waiting for it to initialize..." + +# Wait a few seconds to ensure the server is fully up and running +sleep 5 + +echo "--- Running Integration Test Script ---" + +# Execute the Python integration test script +python3 integration_tests/test_service.py + +# Capture the exit code of the test script +TEST_EXIT_CODE=$? + +# The 'trap' will automatically call the cleanup function now. +# Exit with the same code as the test script (0 for success, non-zero for failure). +exit $TEST_EXIT_CODE \ No newline at end of file diff --git a/ai-hub/tests/__init__.py b/ai-hub/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/tests/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8f767 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.env +**/.env \ No newline at end of file diff --git a/KickoffPlan.md b/KickoffPlan.md new file mode 100644 index 0000000..3763a48 --- /dev/null +++ b/KickoffPlan.md @@ -0,0 +1,97 @@ +# **Kickoff Plan: AI Model Hub Service** + +This document outlines the plan for developing a central **"hub" service** that routes requests to various Large Language Models (LLMs) and uses **PostgreSQL** for metadata storage alongside **FAISS** for similarity search on vector data. + +--- + +### **1. High-Level Architecture** + +The service will consist of three main components: + +1. **API Server**: + A web server that exposes endpoints to receive user prompts and return model responses. This will be the main entry point for all client applications. + +2. **LLM Router/Orchestrator**: + A core logic layer responsible for deciding which LLM (Gemini, DeepSeek, etc.) should handle a given request. It will also manage interactions with **PostgreSQL** and **FAISS**. + +3. **Vector Database (FAISS + PostgreSQL)**: + A two-layered database system: + + * **FAISS**: Stores vectors (numerical representations of text). Handles high-performance similarity search. + * **PostgreSQL**: Stores metadata such as conversation IDs, document titles, timestamps, and other relational data. + +--- + +### **2. Technology Stack** + +* **API Framework**: + **FastAPI (Python)** – High-performance, easy to learn, with automatic interactive documentation, ideal for testing and development. + +* **LLM Interaction**: + **LangChain** (or a similar abstraction library) – Simplifies communication with different LLM APIs by providing a unified interface. + +* **Vector Database**: + + * **FAISS**: High-performance similarity search for vectors. + * **PostgreSQL**: Stores metadata for vectors, such as document IDs, user data, timestamps, etc. Used for filtering, organizing, and managing relational data. + +* **Deployment**: + **Docker** – Containerizing the application for portability, ensuring easy deployment across any machine within the local network. + +--- + +### **3. Development Roadmap** + +#### **Phase 1: Core API and Model Integration** *(1-2 weeks)* + +* [ ] Set up a basic **FastAPI server**. +* [ ] Create a `/chat` endpoint that accepts user prompts. +* [ ] Implement basic **routing logic** to forward requests to one hardcoded LLM (e.g., Gemini). +* [ ] Connect to the LLM's API and return the response to the user. + +#### **Phase 2: PostgreSQL and FAISS Integration** *(2-3 weeks)* + +* [ ] Integrate **PostgreSQL** for metadata storage (document IDs, timestamps, etc.). +* [ ] Integrate **FAISS** for vector storage and similarity search. +* [ ] On each API call, **embed the user prompt** and the model's response into vectors. +* [ ] Store the vectors in **FAISS** and store associated metadata in **PostgreSQL** (such as document title, conversation ID). +* [ ] Perform a **similarity search** using **FAISS** before sending a new prompt to the LLM, and include relevant history stored in **PostgreSQL** as context. + +#### **Phase 3: Multi-Model Routing & RAG** *(1-2 weeks)* + +* [ ] Abstract LLM connections to easily support multiple models (Gemini, DeepSeek, etc.). +* [ ] Add logic to the `/chat` endpoint to allow clients to specify which model to use. +* [ ] Create a separate endpoint (e.g., `/add-document`) to upload text files. +* [ ] Implement a **RAG pipeline**: + + * When a prompt is received, search **FAISS** for relevant vector matches and retrieve metadata from **PostgreSQL**. + * Pass the relevant document chunks along with the prompt to the selected LLM. + +#### **Phase 4: Refinement and Deployment** *(1 week)* + +* [ ] Develop a simple **UI** (optional, could use FastAPI's built-in docs). +* [ ] Write **Dockerfiles** for the application. +* [ ] Add **configuration management** for API keys and other settings. +* [ ] Implement basic **logging** and **error handling**. + +--- + +### **4. PostgreSQL + FAISS Workflow** + +* **Storing Vectors**: + When a document is added, its vector representation is stored in **FAISS**. + Metadata such as document titles, timestamps, and user IDs are stored in **PostgreSQL**. + +* **Querying**: + For a user query, embed the query into a vector. + Use **FAISS** to perform a similarity search and retrieve the nearest vectors. + Query **PostgreSQL** for metadata (e.g., title, author) related to the relevant vectors. + +* **Syncing Data**: + Ensure that metadata in **PostgreSQL** is synchronized with vectors in **FAISS** for accurate and consistent retrieval. + +--- + +This update to the plan leverages **PostgreSQL** for metadata management while **FAISS** handles efficient similarity search. The integration allows you to query and filter both metadata and vectors in an optimized manner, ensuring scalability and flexibility for future features. + +Let me know if you'd like more specifics or adjustments! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/README.md diff --git a/ai-hub/Dockerfile b/ai-hub/Dockerfile new file mode 100644 index 0000000..910cc1f --- /dev/null +++ b/ai-hub/Dockerfile @@ -0,0 +1,23 @@ +# Dockerfile + +# 1. Use an official Python runtime as a parent image +FROM python:3.11-slim + +# 2. Set the working directory inside the container +WORKDIR /app + +# 3. Copy the dependency file and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Copy the application code into the container +# Copy the application and test code +COPY app/ ./app/ +COPY tests/ ./tests/ + +# 5. Expose the port the app runs on +EXPOSE 8000 + +# 6. Define the command to run the application +# --host 0.0.0.0 makes the server accessible from outside the container +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/ai-hub/app/__init__.py b/ai-hub/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/app/__init__.py diff --git a/ai-hub/app/main.py b/ai-hub/app/main.py new file mode 100644 index 0000000..58392c0 --- /dev/null +++ b/ai-hub/app/main.py @@ -0,0 +1,61 @@ +# main.py + +import os +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from dotenv import load_dotenv +from openai import OpenAI + +# --- 1. Application Setup --- +load_dotenv() +app = FastAPI( + title="AI Model Hub Service", + description="A central hub to route requests to various LLMs.", + version="0.1.2", +) + +# --- 2. Pydantic Models for Request/Response --- +class ChatRequest(BaseModel): + prompt: str + +# --- 3. Configure DeepSeek API --- +api_key = os.getenv("DEEPSEEK_API_KEY") +if not api_key: + raise ValueError("DEEPSEEK_API_KEY not found in environment variables. Please set it in the .env file.") + +# MODIFIED: Updated base_url to match the primary documentation +client = OpenAI( + api_key=api_key, + base_url="https://api.deepseek.com" +) + +# --- 4. API Endpoint Definition --- +@app.post("/chat") +async def chat_handler(request: ChatRequest): + """ + Accepts a user prompt and returns a response from the DeepSeek model. + """ + try: + chat_completion = client.chat.completions.create( + model="deepseek-chat", + # MODIFIED: Added a system message for better model instruction + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": request.prompt}, + ], + stream=False # Explicitly setting stream to false as in the example + ) + + response_text = chat_completion.choices[0].message.content + return {"response": response_text} + + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=500, detail="Failed to get response from the model.") + +@app.get("/") +def read_root(): + """ + Root endpoint to confirm the server is running. + """ + return {"status": "AI Model Hub is running!"} \ No newline at end of file diff --git a/ai-hub/docker-compose.yml b/ai-hub/docker-compose.yml new file mode 100644 index 0000000..031b31d --- /dev/null +++ b/ai-hub/docker-compose.yml @@ -0,0 +1,13 @@ +# docker-compose.yml + +version: '3.8' + +services: + ai-hub: + build: . + container_name: ai_hub_service + restart: unless-stopped + env_file: + - .env + ports: + - "8000:8000" \ No newline at end of file diff --git a/ai-hub/integration_tests/test_service.py b/ai-hub/integration_tests/test_service.py new file mode 100644 index 0000000..7e77e25 --- /dev/null +++ b/ai-hub/integration_tests/test_service.py @@ -0,0 +1,62 @@ +# integration_tests/test_service.py + +import requests +import os +from dotenv import load_dotenv + +# --- Configuration --- +# The base URL for our running service. +# Note: We use http://ai-hub:8000 when running Docker-to-Docker, +# but http://127.0.0.1:8000 when running from the host machine. +# For simplicity, we will run this script from the host. +BASE_URL = "http://127.0.0.1:8000" + +# Load the .env file to check if the API key is set +load_dotenv() +API_KEY = os.getenv("DEEPSEEK_API_KEY") + +def test_root_endpoint(): + """Checks if the service is alive.""" + print("Testing root endpoint...") + response = requests.get(f"{BASE_URL}/") + + assert response.status_code == 200 + assert response.json()["status"] == "AI Model Hub is running!" + print("Root endpoint test: PASSED") + +def test_chat_endpoint(): + """ + Sends a real prompt to the /chat endpoint and verifies a valid response. + This will make a REAL API call to DeepSeek and requires a valid key. + """ + print("\nTesting /chat endpoint...") + if not API_KEY or "YOUR_API_KEY" in API_KEY: + print("SKIPPING test: DEEPSEEK_API_KEY not set in .env file.") + return + + json_payload = {"prompt": "Explain what an integration test is in one sentence."} + + try: + response = requests.post(f"{BASE_URL}/chat", json=json_payload, timeout=30) + + # Check for successful HTTP status + assert response.status_code == 200 + + # Check the response body + data = response.json() + assert "response" in data + assert isinstance(data["response"], str) + assert len(data["response"]) > 0 + + print(f"Received response: '{data['response']}'") + print("/chat endpoint test: PASSED") + + except requests.exceptions.RequestException as e: + print(f"/chat endpoint test: FAILED - {e}") + assert False, f"Request failed: {e}" + +if __name__ == "__main__": + print("--- Running Integration Tests ---") + test_root_endpoint() + test_chat_endpoint() + print("\n--- All tests completed ---") \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt new file mode 100644 index 0000000..b45a893 --- /dev/null +++ b/ai-hub/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn[standard] +google-generativeai +python-dotenv +openai +pytest +requests \ No newline at end of file diff --git a/ai-hub/run_integration_tests.sh b/ai-hub/run_integration_tests.sh new file mode 100644 index 0000000..e5e4376 --- /dev/null +++ b/ai-hub/run_integration_tests.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# A script to automate running integration tests locally. +# It starts the FastAPI server, runs the tests, and then shuts down the server. + +echo "--- Starting AI Hub Server for Integration Tests ---" + +# Start the uvicorn server in the background +uvicorn app.main:app --host 127.0.0.1 --port 8000 & + +# Get the Process ID (PID) of the background server +SERVER_PID=$! + +# Define a cleanup function to be called on exit +cleanup() { + echo "--- Shutting Down Server (PID: $SERVER_PID) ---" + kill $SERVER_PID +} + +# Register the cleanup function to run when the script exits +# This ensures the server is stopped even if tests fail or script is interrupted (Ctrl+C) +trap cleanup EXIT + +echo "Server started with PID: $SERVER_PID. Waiting for it to initialize..." + +# Wait a few seconds to ensure the server is fully up and running +sleep 5 + +echo "--- Running Integration Test Script ---" + +# Execute the Python integration test script +python3 integration_tests/test_service.py + +# Capture the exit code of the test script +TEST_EXIT_CODE=$? + +# The 'trap' will automatically call the cleanup function now. +# Exit with the same code as the test script (0 for success, non-zero for failure). +exit $TEST_EXIT_CODE \ No newline at end of file diff --git a/ai-hub/tests/__init__.py b/ai-hub/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ai-hub/tests/__init__.py diff --git a/ai-hub/tests/test_main.py b/ai-hub/tests/test_main.py new file mode 100644 index 0000000..e8ecf34 --- /dev/null +++ b/ai-hub/tests/test_main.py @@ -0,0 +1,46 @@ +# tests/test_main.py + +from fastapi.testclient import TestClient +from unittest.mock import patch, MagicMock +from app.main import app + +# Create a TestClient instance based on our FastAPI app +client = TestClient(app) + +def test_read_root(): + """Test the root endpoint to ensure it's running.""" + response = client.get("/") + assert response.status_code == 200 + assert response.json() == {"status": "AI Model Hub is running!"} + +@patch('app.main.client.chat.completions.create') +def test_chat_handler_success(mock_create): + """Test the /chat endpoint with a successful, mocked API call.""" + # Configure the mock to return a predictable response + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message = MagicMock() + mock_response.choices[0].message.content = "This is a mock response from DeepSeek." + mock_create.return_value = mock_response + + # Make the request to our app + response = client.post("/chat", json={"prompt": "Hello there"}) + + # Assert our app behaved as expected + assert response.status_code == 200 + assert response.json() == {"response": "This is a mock response from DeepSeek."} + # Verify that the mocked function was called + mock_create.assert_called_once() + +@patch('app.main.client.chat.completions.create') +def test_chat_handler_api_failure(mock_create): + """Test the /chat endpoint when the external API fails.""" + # Configure the mock to raise an exception + mock_create.side_effect = Exception("API connection error") + + # Make the request to our app + response = client.post("/chat", json={"prompt": "This request will fail"}) + + # Assert our app handles the error gracefully + assert response.status_code == 500 + assert response.json() == {"detail": "Failed to get response from the model."} \ No newline at end of file