diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index ab8f8f4..454ce79 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,29 +1,82 @@ -# app/config.py - import os +import yaml from dotenv import load_dotenv -# Load environment variables from a .env file +# Load .env file at the module level load_dotenv() -class Settings: - # --- Database --- - DB_MODE: str = os.getenv("DB_MODE", "sqlite") - DATABASE_URL: str = ( - "sqlite:///./data/ai_hub.db" - if DB_MODE == "sqlite" - else os.getenv("DATABASE_URL", "postgresql://user:password@localhost/ai_hub_db") - ) - - # --- LLM API Keys & Models --- - DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") - GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") - GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest") - - # --- Vector Store --- - FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss_index.bin") - EMBEDDING_DIMENSION: int = int(os.getenv("EMBEDDING_DIMENSION", 768)) +def _load_yaml_config(path: str) -> dict: + """Loads a YAML config file if it exists, otherwise returns an empty dict.""" + if os.path.exists(path): + print(f"✅ Loading configuration from {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) or {} + print(f"⚠️ '{path}' not found. Using environment variables and defaults.") + return {} -# Instantiate the settings so they can be imported and used anywhere +class Settings: + """ + Holds all application settings, calculated once on initialization. + Priority: YAML > Environment Variables > Defaults. + """ + def __init__(self): + # 1. Load the YAML configuration data + config_path = os.getenv("CONFIG_PATH", "config.yaml") + yaml_config = _load_yaml_config(config_path) + + # 2. Define a helper to safely get nested values from the loaded YAML + def get_from_yaml(keys): + d = yaml_config + for key in keys: + d = d.get(key) if isinstance(d, dict) else None + return d + + # 3. Set all attributes with the correct priority + # --- Application --- + self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ + os.getenv("PROJECT_NAME") or \ + "Cortex Hub" + self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + + # --- Database --- + self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ + os.getenv("DB_MODE") or \ + "sqlite" + + if self.DB_MODE == "sqlite": + self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" + else: + self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ + os.getenv("DATABASE_URL") or \ + "postgresql://user:password@localhost/ai_hub_db" + + # --- LLM API Keys & Models (Secrets should come from ENV) --- + self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") + self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") + + # Fail fast if required secrets are missing or empty + if not self.DEEPSEEK_API_KEY: + raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") + if not self.GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + + self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ + os.getenv("DEEPSEEK_MODEL_NAME") or \ + "deepseek-chat" + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + os.getenv("GEMINI_MODEL_NAME") or \ + "gemini-1.5-flash-latest" + + # --- Vector Store --- + self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ + os.getenv("FAISS_INDEX_PATH") or \ + "data/faiss_index.bin" + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ + os.getenv("EMBEDDING_DIMENSION") or \ + "768" + self.EMBEDDING_DIMENSION: int = int(dimension_str) + + +# Instantiate a single, importable settings object for the entire application. +# The __init__ method runs once, and all values are now set. settings = Settings() \ No newline at end of file diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index ab8f8f4..454ce79 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,29 +1,82 @@ -# app/config.py - import os +import yaml from dotenv import load_dotenv -# Load environment variables from a .env file +# Load .env file at the module level load_dotenv() -class Settings: - # --- Database --- - DB_MODE: str = os.getenv("DB_MODE", "sqlite") - DATABASE_URL: str = ( - "sqlite:///./data/ai_hub.db" - if DB_MODE == "sqlite" - else os.getenv("DATABASE_URL", "postgresql://user:password@localhost/ai_hub_db") - ) - - # --- LLM API Keys & Models --- - DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") - GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") - GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest") - - # --- Vector Store --- - FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss_index.bin") - EMBEDDING_DIMENSION: int = int(os.getenv("EMBEDDING_DIMENSION", 768)) +def _load_yaml_config(path: str) -> dict: + """Loads a YAML config file if it exists, otherwise returns an empty dict.""" + if os.path.exists(path): + print(f"✅ Loading configuration from {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) or {} + print(f"⚠️ '{path}' not found. Using environment variables and defaults.") + return {} -# Instantiate the settings so they can be imported and used anywhere +class Settings: + """ + Holds all application settings, calculated once on initialization. + Priority: YAML > Environment Variables > Defaults. + """ + def __init__(self): + # 1. Load the YAML configuration data + config_path = os.getenv("CONFIG_PATH", "config.yaml") + yaml_config = _load_yaml_config(config_path) + + # 2. Define a helper to safely get nested values from the loaded YAML + def get_from_yaml(keys): + d = yaml_config + for key in keys: + d = d.get(key) if isinstance(d, dict) else None + return d + + # 3. Set all attributes with the correct priority + # --- Application --- + self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ + os.getenv("PROJECT_NAME") or \ + "Cortex Hub" + self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + + # --- Database --- + self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ + os.getenv("DB_MODE") or \ + "sqlite" + + if self.DB_MODE == "sqlite": + self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" + else: + self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ + os.getenv("DATABASE_URL") or \ + "postgresql://user:password@localhost/ai_hub_db" + + # --- LLM API Keys & Models (Secrets should come from ENV) --- + self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") + self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") + + # Fail fast if required secrets are missing or empty + if not self.DEEPSEEK_API_KEY: + raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") + if not self.GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + + self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ + os.getenv("DEEPSEEK_MODEL_NAME") or \ + "deepseek-chat" + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + os.getenv("GEMINI_MODEL_NAME") or \ + "gemini-1.5-flash-latest" + + # --- Vector Store --- + self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ + os.getenv("FAISS_INDEX_PATH") or \ + "data/faiss_index.bin" + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ + os.getenv("EMBEDDING_DIMENSION") or \ + "768" + self.EMBEDDING_DIMENSION: int = int(dimension_str) + + +# Instantiate a single, importable settings object for the entire application. +# The __init__ method runs once, and all values are now set. settings = Settings() \ No newline at end of file diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml new file mode 100644 index 0000000..c03d2da --- /dev/null +++ b/ai-hub/app/config.yaml @@ -0,0 +1,25 @@ +# Default application configuration for Cortex Hub +application: + project_name: "Cortex Hub - AI Model Service" + version: "1.0.0" + +database: + # The mode can be 'sqlite' or 'postgres'. + # This can be overridden by the DB_MODE environment variable. + mode: "sqlite" + + # The connection string for the database. + # This can be overridden by the DATABASE_URL environment variable. + url: "sqlite:///./data/ai_hub.db" + +llm_providers: + # Default model names for the LLM providers. + # These can be overridden by environment variables like DEEPSEEK_MODEL_NAME. + deepseek_model_name: "deepseek-chat" + gemini_model_name: "gemini-1.5-flash-latest" + +vector_store: + # Path to the FAISS index file. + index_path: "data/faiss_index.bin" + # The dimension of the sentence embeddings. + embedding_dimension: 768 \ No newline at end of file diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index ab8f8f4..454ce79 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,29 +1,82 @@ -# app/config.py - import os +import yaml from dotenv import load_dotenv -# Load environment variables from a .env file +# Load .env file at the module level load_dotenv() -class Settings: - # --- Database --- - DB_MODE: str = os.getenv("DB_MODE", "sqlite") - DATABASE_URL: str = ( - "sqlite:///./data/ai_hub.db" - if DB_MODE == "sqlite" - else os.getenv("DATABASE_URL", "postgresql://user:password@localhost/ai_hub_db") - ) - - # --- LLM API Keys & Models --- - DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") - GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") - GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest") - - # --- Vector Store --- - FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss_index.bin") - EMBEDDING_DIMENSION: int = int(os.getenv("EMBEDDING_DIMENSION", 768)) +def _load_yaml_config(path: str) -> dict: + """Loads a YAML config file if it exists, otherwise returns an empty dict.""" + if os.path.exists(path): + print(f"✅ Loading configuration from {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) or {} + print(f"⚠️ '{path}' not found. Using environment variables and defaults.") + return {} -# Instantiate the settings so they can be imported and used anywhere +class Settings: + """ + Holds all application settings, calculated once on initialization. + Priority: YAML > Environment Variables > Defaults. + """ + def __init__(self): + # 1. Load the YAML configuration data + config_path = os.getenv("CONFIG_PATH", "config.yaml") + yaml_config = _load_yaml_config(config_path) + + # 2. Define a helper to safely get nested values from the loaded YAML + def get_from_yaml(keys): + d = yaml_config + for key in keys: + d = d.get(key) if isinstance(d, dict) else None + return d + + # 3. Set all attributes with the correct priority + # --- Application --- + self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ + os.getenv("PROJECT_NAME") or \ + "Cortex Hub" + self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + + # --- Database --- + self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ + os.getenv("DB_MODE") or \ + "sqlite" + + if self.DB_MODE == "sqlite": + self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" + else: + self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ + os.getenv("DATABASE_URL") or \ + "postgresql://user:password@localhost/ai_hub_db" + + # --- LLM API Keys & Models (Secrets should come from ENV) --- + self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") + self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") + + # Fail fast if required secrets are missing or empty + if not self.DEEPSEEK_API_KEY: + raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") + if not self.GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + + self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ + os.getenv("DEEPSEEK_MODEL_NAME") or \ + "deepseek-chat" + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + os.getenv("GEMINI_MODEL_NAME") or \ + "gemini-1.5-flash-latest" + + # --- Vector Store --- + self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ + os.getenv("FAISS_INDEX_PATH") or \ + "data/faiss_index.bin" + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ + os.getenv("EMBEDDING_DIMENSION") or \ + "768" + self.EMBEDDING_DIMENSION: int = int(dimension_str) + + +# Instantiate a single, importable settings object for the entire application. +# The __init__ method runs once, and all values are now set. settings = Settings() \ No newline at end of file diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml new file mode 100644 index 0000000..c03d2da --- /dev/null +++ b/ai-hub/app/config.yaml @@ -0,0 +1,25 @@ +# Default application configuration for Cortex Hub +application: + project_name: "Cortex Hub - AI Model Service" + version: "1.0.0" + +database: + # The mode can be 'sqlite' or 'postgres'. + # This can be overridden by the DB_MODE environment variable. + mode: "sqlite" + + # The connection string for the database. + # This can be overridden by the DATABASE_URL environment variable. + url: "sqlite:///./data/ai_hub.db" + +llm_providers: + # Default model names for the LLM providers. + # These can be overridden by environment variables like DEEPSEEK_MODEL_NAME. + deepseek_model_name: "deepseek-chat" + gemini_model_name: "gemini-1.5-flash-latest" + +vector_store: + # Path to the FAISS index file. + index_path: "data/faiss_index.bin" + # The dimension of the sentence embeddings. + embedding_dimension: 768 \ No newline at end of file diff --git a/ai-hub/app/utils.py b/ai-hub/app/utils.py new file mode 100644 index 0000000..a61449c --- /dev/null +++ b/ai-hub/app/utils.py @@ -0,0 +1,32 @@ +import os + +def print_config(settings_obj): + """ + Prints the application's configuration to the console, masking sensitive values. + """ + print("--- ⚙️ Application Configuration ---") + + sensitive_keywords = ["KEY", "TOKEN", "SECRET", "PASSWORD"] + + # Use sorted(dir(...)) to ensure a consistent output order for tests + for key in sorted(dir(settings_obj)): + # Skip private/dunder methods + if key.startswith('_'): + continue + + value = getattr(settings_obj, key) + + # **CRITICAL FIX**: Skip attributes that are methods/functions + if callable(value): + continue + + if any(k in key for k in sensitive_keywords): + if isinstance(value, str) and len(value) > 8: + masked_value = f"{value[:4]}...{value[-4:]}" + else: + masked_value = "***" + print(f" - {key}: {masked_value}") + else: + print(f" - {key}: {value}") + + print("------------------------------------") \ No newline at end of file diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index ab8f8f4..454ce79 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,29 +1,82 @@ -# app/config.py - import os +import yaml from dotenv import load_dotenv -# Load environment variables from a .env file +# Load .env file at the module level load_dotenv() -class Settings: - # --- Database --- - DB_MODE: str = os.getenv("DB_MODE", "sqlite") - DATABASE_URL: str = ( - "sqlite:///./data/ai_hub.db" - if DB_MODE == "sqlite" - else os.getenv("DATABASE_URL", "postgresql://user:password@localhost/ai_hub_db") - ) - - # --- LLM API Keys & Models --- - DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") - GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") - GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest") - - # --- Vector Store --- - FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss_index.bin") - EMBEDDING_DIMENSION: int = int(os.getenv("EMBEDDING_DIMENSION", 768)) +def _load_yaml_config(path: str) -> dict: + """Loads a YAML config file if it exists, otherwise returns an empty dict.""" + if os.path.exists(path): + print(f"✅ Loading configuration from {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) or {} + print(f"⚠️ '{path}' not found. Using environment variables and defaults.") + return {} -# Instantiate the settings so they can be imported and used anywhere +class Settings: + """ + Holds all application settings, calculated once on initialization. + Priority: YAML > Environment Variables > Defaults. + """ + def __init__(self): + # 1. Load the YAML configuration data + config_path = os.getenv("CONFIG_PATH", "config.yaml") + yaml_config = _load_yaml_config(config_path) + + # 2. Define a helper to safely get nested values from the loaded YAML + def get_from_yaml(keys): + d = yaml_config + for key in keys: + d = d.get(key) if isinstance(d, dict) else None + return d + + # 3. Set all attributes with the correct priority + # --- Application --- + self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ + os.getenv("PROJECT_NAME") or \ + "Cortex Hub" + self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + + # --- Database --- + self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ + os.getenv("DB_MODE") or \ + "sqlite" + + if self.DB_MODE == "sqlite": + self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" + else: + self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ + os.getenv("DATABASE_URL") or \ + "postgresql://user:password@localhost/ai_hub_db" + + # --- LLM API Keys & Models (Secrets should come from ENV) --- + self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") + self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") + + # Fail fast if required secrets are missing or empty + if not self.DEEPSEEK_API_KEY: + raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") + if not self.GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + + self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ + os.getenv("DEEPSEEK_MODEL_NAME") or \ + "deepseek-chat" + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + os.getenv("GEMINI_MODEL_NAME") or \ + "gemini-1.5-flash-latest" + + # --- Vector Store --- + self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ + os.getenv("FAISS_INDEX_PATH") or \ + "data/faiss_index.bin" + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ + os.getenv("EMBEDDING_DIMENSION") or \ + "768" + self.EMBEDDING_DIMENSION: int = int(dimension_str) + + +# Instantiate a single, importable settings object for the entire application. +# The __init__ method runs once, and all values are now set. settings = Settings() \ No newline at end of file diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml new file mode 100644 index 0000000..c03d2da --- /dev/null +++ b/ai-hub/app/config.yaml @@ -0,0 +1,25 @@ +# Default application configuration for Cortex Hub +application: + project_name: "Cortex Hub - AI Model Service" + version: "1.0.0" + +database: + # The mode can be 'sqlite' or 'postgres'. + # This can be overridden by the DB_MODE environment variable. + mode: "sqlite" + + # The connection string for the database. + # This can be overridden by the DATABASE_URL environment variable. + url: "sqlite:///./data/ai_hub.db" + +llm_providers: + # Default model names for the LLM providers. + # These can be overridden by environment variables like DEEPSEEK_MODEL_NAME. + deepseek_model_name: "deepseek-chat" + gemini_model_name: "gemini-1.5-flash-latest" + +vector_store: + # Path to the FAISS index file. + index_path: "data/faiss_index.bin" + # The dimension of the sentence embeddings. + embedding_dimension: 768 \ No newline at end of file diff --git a/ai-hub/app/utils.py b/ai-hub/app/utils.py new file mode 100644 index 0000000..a61449c --- /dev/null +++ b/ai-hub/app/utils.py @@ -0,0 +1,32 @@ +import os + +def print_config(settings_obj): + """ + Prints the application's configuration to the console, masking sensitive values. + """ + print("--- ⚙️ Application Configuration ---") + + sensitive_keywords = ["KEY", "TOKEN", "SECRET", "PASSWORD"] + + # Use sorted(dir(...)) to ensure a consistent output order for tests + for key in sorted(dir(settings_obj)): + # Skip private/dunder methods + if key.startswith('_'): + continue + + value = getattr(settings_obj, key) + + # **CRITICAL FIX**: Skip attributes that are methods/functions + if callable(value): + continue + + if any(k in key for k in sensitive_keywords): + if isinstance(value, str) and len(value) > 8: + masked_value = f"{value[:4]}...{value[-4:]}" + else: + masked_value = "***" + print(f" - {key}: {masked_value}") + else: + print(f" - {key}: {value}") + + print("------------------------------------") \ No newline at end of file diff --git a/ai-hub/tests/test_config.py b/ai-hub/tests/test_config.py new file mode 100644 index 0000000..ae6a642 --- /dev/null +++ b/ai-hub/tests/test_config.py @@ -0,0 +1,62 @@ +import pytest +import importlib +import yaml + +@pytest.fixture +def tmp_config_file(tmp_path): + """Creates a temporary config.yaml file and returns its path.""" + config_content = { + "application": {"project_name": "Test Project from YAML"}, + "llm_providers": {"deepseek_model_name": "deepseek-from-yaml"} + } + config_path = tmp_path / "test_config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config_content, f) + return str(config_path) + +def test_yaml_overrides_env_var(monkeypatch, tmp_config_file): + """ + Tests that a value from the YAML file has priority over an environment variable. + """ + # Arrange: Set BOTH a YAML value and an environment variable + monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + monkeypatch.setenv("DEEPSEEK_MODEL_NAME", "deepseek-from-env") # This should be ignored + + # Act + from app import config + importlib.reload(config) + + # Assert: The value from the YAML file should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-from-yaml" + +def test_env_var_overrides_default(monkeypatch): + """ + Tests that an environment variable overrides the hardcoded default when + the YAML file is missing or doesn't contain the key. + """ + # Arrange: Point to a non-existent file and set an environment variable + monkeypatch.setenv("CONFIG_PATH", "/path/that/does/not/exist.yaml") + monkeypatch.setenv("DEEPSEEK_MODEL_NAME", "deepseek-from-env") + + # Act + from app import config + importlib.reload(config) + + # Assert: The value from the environment variable should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-from-env" + +def test_hardcoded_default_is_used_last(monkeypatch): + """ + Tests that the hardcoded default is used when the YAML is missing and + the environment variable is not set. + """ + # Arrange + monkeypatch.setenv("CONFIG_PATH", "/path/that/does/not/exist.yaml") + monkeypatch.delenv("DEEPSEEK_MODEL_NAME", raising=False) + + # Act + from app import config + importlib.reload(config) + + # Assert: The hardcoded default value should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-chat" \ No newline at end of file diff --git a/ai-hub/app/app.py b/ai-hub/app/app.py index e357d57..69985ed 100644 --- a/ai-hub/app/app.py +++ b/ai-hub/app/app.py @@ -1,52 +1,63 @@ -import os -from contextlib import asynccontextmanager from fastapi import FastAPI +from contextlib import asynccontextmanager from typing import List -# Import core application logic +# Import centralized settings and other components +from app.config import settings from app.core.vector_store import FaissVectorStore from app.core.retrievers import FaissDBRetriever, Retriever from app.core.services import RAGService - -# Import the new files for database and API routes from app.db.session import create_db_and_tables from app.api.routes import create_api_router +from app.utils import print_config -# --- Application Factory Function --- +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manages application startup and shutdown events. + - On startup, it creates database tables. + - On shutdown, it saves the FAISS index to disk. + """ + print("Application startup...") + print_config(settings) + create_db_and_tables() + yield + print("Application shutdown...") + # Access the vector_store from the application state to save it + if hasattr(app.state, 'vector_store'): + app.state.vector_store.save_index() + def create_app() -> FastAPI: """ Factory function to create and configure the FastAPI application. This encapsulates all setup logic, making the main entry point clean. """ - # Initialize core services for RAG - # CORRECTED: Now passing the required arguments to FaissVectorStore - vector_store = FaissVectorStore(index_file_path="data/faiss_index.bin", dimension=768) - retrievers: List[Retriever] = [ - FaissDBRetriever(vector_store=vector_store), - ] - rag_service = RAGService(vector_store=vector_store, retrievers=retrievers) - - @asynccontextmanager - async def lifespan(app: FastAPI): - """ - Initializes the database and vector store on startup and handles - cleanup on shutdown. - """ - print("Initializing application services...") - create_db_and_tables() - yield - print("Shutting down application services...") - vector_store.save_index() - app = FastAPI( - title="AI Model Hub Service", - description="A extensible hub to route requests to various LLMs with RAG capabilities.", - version="0.0.0", + # Use metadata from the central settings + title=settings.PROJECT_NAME, + version=settings.VERSION, + description="A modular API to route requests to various LLMs with RAG capabilities.", lifespan=lifespan ) - # Create and include the API router + # --- Initialize Core Services using settings --- + # Store services on the app.state object for easy access, e.g., in the lifespan manager. + app.state.vector_store = FaissVectorStore( + index_file_path=settings.FAISS_INDEX_PATH, + dimension=settings.EMBEDDING_DIMENSION + ) + + retrievers: List[Retriever] = [ + FaissDBRetriever(vector_store=app.state.vector_store), + ] + + rag_service = RAGService( + vector_store=app.state.vector_store, + retrievers=retrievers + ) + + # Create and include the API router, injecting the service api_router = create_api_router(rag_service=rag_service) app.include_router(api_router) - return app + return app \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index ab8f8f4..454ce79 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,29 +1,82 @@ -# app/config.py - import os +import yaml from dotenv import load_dotenv -# Load environment variables from a .env file +# Load .env file at the module level load_dotenv() -class Settings: - # --- Database --- - DB_MODE: str = os.getenv("DB_MODE", "sqlite") - DATABASE_URL: str = ( - "sqlite:///./data/ai_hub.db" - if DB_MODE == "sqlite" - else os.getenv("DATABASE_URL", "postgresql://user:password@localhost/ai_hub_db") - ) - - # --- LLM API Keys & Models --- - DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") - GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME", "deepseek-chat") - GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest") - - # --- Vector Store --- - FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss_index.bin") - EMBEDDING_DIMENSION: int = int(os.getenv("EMBEDDING_DIMENSION", 768)) +def _load_yaml_config(path: str) -> dict: + """Loads a YAML config file if it exists, otherwise returns an empty dict.""" + if os.path.exists(path): + print(f"✅ Loading configuration from {path}") + with open(path, 'r') as f: + return yaml.safe_load(f) or {} + print(f"⚠️ '{path}' not found. Using environment variables and defaults.") + return {} -# Instantiate the settings so they can be imported and used anywhere +class Settings: + """ + Holds all application settings, calculated once on initialization. + Priority: YAML > Environment Variables > Defaults. + """ + def __init__(self): + # 1. Load the YAML configuration data + config_path = os.getenv("CONFIG_PATH", "config.yaml") + yaml_config = _load_yaml_config(config_path) + + # 2. Define a helper to safely get nested values from the loaded YAML + def get_from_yaml(keys): + d = yaml_config + for key in keys: + d = d.get(key) if isinstance(d, dict) else None + return d + + # 3. Set all attributes with the correct priority + # --- Application --- + self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ + os.getenv("PROJECT_NAME") or \ + "Cortex Hub" + self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + + # --- Database --- + self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ + os.getenv("DB_MODE") or \ + "sqlite" + + if self.DB_MODE == "sqlite": + self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" + else: + self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ + os.getenv("DATABASE_URL") or \ + "postgresql://user:password@localhost/ai_hub_db" + + # --- LLM API Keys & Models (Secrets should come from ENV) --- + self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") + self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") + + # Fail fast if required secrets are missing or empty + if not self.DEEPSEEK_API_KEY: + raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") + if not self.GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + + self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ + os.getenv("DEEPSEEK_MODEL_NAME") or \ + "deepseek-chat" + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + os.getenv("GEMINI_MODEL_NAME") or \ + "gemini-1.5-flash-latest" + + # --- Vector Store --- + self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ + os.getenv("FAISS_INDEX_PATH") or \ + "data/faiss_index.bin" + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ + os.getenv("EMBEDDING_DIMENSION") or \ + "768" + self.EMBEDDING_DIMENSION: int = int(dimension_str) + + +# Instantiate a single, importable settings object for the entire application. +# The __init__ method runs once, and all values are now set. settings = Settings() \ No newline at end of file diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml new file mode 100644 index 0000000..c03d2da --- /dev/null +++ b/ai-hub/app/config.yaml @@ -0,0 +1,25 @@ +# Default application configuration for Cortex Hub +application: + project_name: "Cortex Hub - AI Model Service" + version: "1.0.0" + +database: + # The mode can be 'sqlite' or 'postgres'. + # This can be overridden by the DB_MODE environment variable. + mode: "sqlite" + + # The connection string for the database. + # This can be overridden by the DATABASE_URL environment variable. + url: "sqlite:///./data/ai_hub.db" + +llm_providers: + # Default model names for the LLM providers. + # These can be overridden by environment variables like DEEPSEEK_MODEL_NAME. + deepseek_model_name: "deepseek-chat" + gemini_model_name: "gemini-1.5-flash-latest" + +vector_store: + # Path to the FAISS index file. + index_path: "data/faiss_index.bin" + # The dimension of the sentence embeddings. + embedding_dimension: 768 \ No newline at end of file diff --git a/ai-hub/app/utils.py b/ai-hub/app/utils.py new file mode 100644 index 0000000..a61449c --- /dev/null +++ b/ai-hub/app/utils.py @@ -0,0 +1,32 @@ +import os + +def print_config(settings_obj): + """ + Prints the application's configuration to the console, masking sensitive values. + """ + print("--- ⚙️ Application Configuration ---") + + sensitive_keywords = ["KEY", "TOKEN", "SECRET", "PASSWORD"] + + # Use sorted(dir(...)) to ensure a consistent output order for tests + for key in sorted(dir(settings_obj)): + # Skip private/dunder methods + if key.startswith('_'): + continue + + value = getattr(settings_obj, key) + + # **CRITICAL FIX**: Skip attributes that are methods/functions + if callable(value): + continue + + if any(k in key for k in sensitive_keywords): + if isinstance(value, str) and len(value) > 8: + masked_value = f"{value[:4]}...{value[-4:]}" + else: + masked_value = "***" + print(f" - {key}: {masked_value}") + else: + print(f" - {key}: {value}") + + print("------------------------------------") \ No newline at end of file diff --git a/ai-hub/tests/test_config.py b/ai-hub/tests/test_config.py new file mode 100644 index 0000000..ae6a642 --- /dev/null +++ b/ai-hub/tests/test_config.py @@ -0,0 +1,62 @@ +import pytest +import importlib +import yaml + +@pytest.fixture +def tmp_config_file(tmp_path): + """Creates a temporary config.yaml file and returns its path.""" + config_content = { + "application": {"project_name": "Test Project from YAML"}, + "llm_providers": {"deepseek_model_name": "deepseek-from-yaml"} + } + config_path = tmp_path / "test_config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config_content, f) + return str(config_path) + +def test_yaml_overrides_env_var(monkeypatch, tmp_config_file): + """ + Tests that a value from the YAML file has priority over an environment variable. + """ + # Arrange: Set BOTH a YAML value and an environment variable + monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + monkeypatch.setenv("DEEPSEEK_MODEL_NAME", "deepseek-from-env") # This should be ignored + + # Act + from app import config + importlib.reload(config) + + # Assert: The value from the YAML file should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-from-yaml" + +def test_env_var_overrides_default(monkeypatch): + """ + Tests that an environment variable overrides the hardcoded default when + the YAML file is missing or doesn't contain the key. + """ + # Arrange: Point to a non-existent file and set an environment variable + monkeypatch.setenv("CONFIG_PATH", "/path/that/does/not/exist.yaml") + monkeypatch.setenv("DEEPSEEK_MODEL_NAME", "deepseek-from-env") + + # Act + from app import config + importlib.reload(config) + + # Assert: The value from the environment variable should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-from-env" + +def test_hardcoded_default_is_used_last(monkeypatch): + """ + Tests that the hardcoded default is used when the YAML is missing and + the environment variable is not set. + """ + # Arrange + monkeypatch.setenv("CONFIG_PATH", "/path/that/does/not/exist.yaml") + monkeypatch.delenv("DEEPSEEK_MODEL_NAME", raising=False) + + # Act + from app import config + importlib.reload(config) + + # Assert: The hardcoded default value should be used + assert config.settings.DEEPSEEK_MODEL_NAME == "deepseek-chat" \ No newline at end of file diff --git a/ai-hub/tests/test_utils.py b/ai-hub/tests/test_utils.py new file mode 100644 index 0000000..d4b72f1 --- /dev/null +++ b/ai-hub/tests/test_utils.py @@ -0,0 +1,64 @@ +import pytest +from app.utils import print_config + +# --- Test Suite for app/utils.py --- + +class MockSettings: + def __init__(self): + self.PROJECT_NAME = "Test Project" + self.DEEPSEEK_API_KEY = "deepseek_abcdef123456" + self.GEMINI_SECRET = "gemini_ghjklm789012" + self.DB_PASSWORD = "short" + self.VERSION = "1.0.0" + +def test_print_config_masks_sensitive_data(capsys): + """ + Tests that print_config correctly identifies and masks values + with sensitive keywords in their names (KEY, SECRET, PASSWORD). + """ + # Arrange + mock_settings = MockSettings() + + # Act + print_config(mock_settings) + captured = capsys.readouterr() + + # Assert + assert "PROJECT_NAME: Test Project" in captured.out + assert "VERSION: 1.0.0" in captured.out + + # **CRITICAL FIX**: Correct the typo in the expected masked value + assert "DEEPSEEK_API_KEY: deep...3456" in captured.out + assert "GEMINI_SECRET: gemi...9012" in captured.out + assert "DB_PASSWORD: ***" in captured.out + +def test_print_config_handles_various_attributes(capsys): + """ + Tests that the function handles attributes of different types and + avoids printing private or special attributes. + """ + # Arrange + class ComplexMockSettings: + def __init__(self): + self.PUBLIC_SETTING = "Visible" + self.ANOTHER_KEY = "long_and_secure_key_123" + self.EMPTY_SECRET = "" + self.some_random_method = lambda: "hello" + self._private_attr = "should not be visible" + self.__super_private = "also not visible" + + mock_settings = ComplexMockSettings() + + # Act + print_config(mock_settings) + captured = capsys.readouterr() + + # Assert + assert "PUBLIC_SETTING: Visible" in captured.out + assert "ANOTHER_KEY: long..._123" in captured.out + assert "EMPTY_SECRET: ***" in captured.out + + # This assertion will now pass because the updated print_config skips methods + assert "_private_attr" not in captured.out + assert "some_random_method" not in captured.out + assert "__super_private" not in captured.out \ No newline at end of file