diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 454ce79..8bf3780 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -1,82 +1,101 @@ import os import yaml from dotenv import load_dotenv +from pydantic import BaseModel, Field -# Load .env file at the module level load_dotenv() -def _load_yaml_config(path: str) -> dict: - """Loads a YAML config file if it exists, otherwise returns an empty dict.""" - if os.path.exists(path): - print(f"✅ Loading configuration from {path}") - with open(path, 'r') as f: - return yaml.safe_load(f) or {} - print(f"⚠️ '{path}' not found. Using environment variables and defaults.") - return {} +# --- 1. Define the Configuration Schema (No changes here) --- + +class ApplicationSettings(BaseModel): + project_name: str = "Cortex Hub" + version: str = "1.0.0" + +class DatabaseSettings(BaseModel): + mode: str = "sqlite" + url: str = "postgresql://user:password@localhost/ai_hub_db" + +class LLMProviderSettings(BaseModel): + deepseek_model_name: str = "deepseek-chat" + gemini_model_name: str = "gemini-1.5-flash-latest" + +class VectorStoreSettings(BaseModel): + index_path: str = "data/faiss_index.bin" + embedding_dimension: int = 768 + +class AppConfig(BaseModel): + application: ApplicationSettings = Field(default_factory=ApplicationSettings) + database: DatabaseSettings = Field(default_factory=DatabaseSettings) + llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) + vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) + +# --- 2. Create the Final Settings Object --- class Settings: """ - Holds all application settings, calculated once on initialization. - Priority: YAML > Environment Variables > Defaults. + Holds all application settings, validated and structured by Pydantic. + Priority Order: YAML File > Environment Variables > Pydantic Defaults """ def __init__(self): - # 1. Load the YAML configuration data + # Load base configuration from YAML config_path = os.getenv("CONFIG_PATH", "config.yaml") - yaml_config = _load_yaml_config(config_path) - - # 2. Define a helper to safely get nested values from the loaded YAML + yaml_data = {} + if os.path.exists(config_path): + print(f"✅ Loading configuration from {config_path}") + with open(config_path, 'r') as f: + yaml_data = yaml.safe_load(f) or {} + else: + print(f"⚠️ '{config_path}' not found. Using defaults and environment variables.") + + # Parse the loaded data to get defaults for any missing sections + config_from_pydantic = AppConfig.parse_obj(yaml_data) + + # --- **CRITICAL FIX**: Apply the correct priority logic --- + # Helper to safely get nested values from the raw YAML dict def get_from_yaml(keys): - d = yaml_config + d = yaml_data for key in keys: d = d.get(key) if isinstance(d, dict) else None return d - # 3. Set all attributes with the correct priority - # --- Application --- + # --- Set final attributes with YAML > ENV > Default priority --- self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \ os.getenv("PROJECT_NAME") or \ - "Cortex Hub" - self.VERSION: str = get_from_yaml(["application", "version"]) or "1.0.0" + config_from_pydantic.application.project_name + self.VERSION: str = config_from_pydantic.application.version - # --- Database --- self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \ os.getenv("DB_MODE") or \ - "sqlite" + config_from_pydantic.database.mode if self.DB_MODE == "sqlite": self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db" else: self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \ os.getenv("DATABASE_URL") or \ - "postgresql://user:password@localhost/ai_hub_db" + config_from_pydantic.database.url - # --- LLM API Keys & Models (Secrets should come from ENV) --- self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY") self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY") - - # Fail fast if required secrets are missing or empty - if not self.DEEPSEEK_API_KEY: - raise ValueError("DEEPSEEK_API_KEY is not set or is empty in the environment.") - if not self.GEMINI_API_KEY: - raise ValueError("GEMINI_API_KEY is not set or is empty in the environment.") + if not self.DEEPSEEK_API_KEY or not self.GEMINI_API_KEY: + raise ValueError("API keys must be set in the environment.") self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \ os.getenv("DEEPSEEK_MODEL_NAME") or \ - "deepseek-chat" + config_from_pydantic.llm_providers.deepseek_model_name + self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \ os.getenv("GEMINI_MODEL_NAME") or \ - "gemini-1.5-flash-latest" + config_from_pydantic.llm_providers.gemini_model_name - # --- Vector Store --- self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \ os.getenv("FAISS_INDEX_PATH") or \ - "data/faiss_index.bin" + config_from_pydantic.vector_store.index_path + dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \ os.getenv("EMBEDDING_DIMENSION") or \ - "768" + config_from_pydantic.vector_store.embedding_dimension self.EMBEDDING_DIMENSION: int = int(dimension_str) - -# Instantiate a single, importable settings object for the entire application. -# The __init__ method runs once, and all values are now set. +# Instantiate the single settings object for the application settings = Settings() \ No newline at end of file