Newer
Older
cortex-hub / ai-hub / app / config.py
import os
import yaml
from enum import Enum
from typing import Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field, SecretStr

# Load environment variables from .env file
load_dotenv()

# --- 1. Define the Configuration Schema ---

class EmbeddingProvider(str, Enum):
    """An enum for supported embedding providers."""
    GOOGLE_GENAI = "google_genai"
    MOCK = "mock"

class TTSProvider(str, Enum):
    """An enum for supported Text-to-Speech (TTS) providers."""
    GOOGLE_GENAI = "google_genai"

class ApplicationSettings(BaseModel):
    project_name: str = "Cortex Hub"
    version: str = "1.0.0"
    log_level: str = "INFO"

class DatabaseSettings(BaseModel):
    mode: str = "sqlite"
    url: Optional[str] = None
    local_path: str = "data/ai_hub.db"

class LLMProviderSettings(BaseModel):
    deepseek_model_name: str = "deepseek-chat"
    gemini_model_name: str = "gemini-1.5-flash-latest"

class EmbeddingProviderSettings(BaseModel):
    provider: EmbeddingProvider = Field(default=EmbeddingProvider.GOOGLE_GENAI)
    model_name: str = "models/text-embedding-004"
    api_key: Optional[SecretStr] = None

class TTSProviderSettings(BaseModel):
    provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI)
    voice_name: str = "Kore"
    model_name: str = "gemini-2.5-flash-preview-tts"
    api_key: Optional[SecretStr] = None

class VectorStoreSettings(BaseModel):
    index_path: str = "data/faiss_index.bin"
    embedding_dimension: int = 768

class AppConfig(BaseModel):
    """Top-level Pydantic model for application configuration."""
    application: ApplicationSettings = Field(default_factory=ApplicationSettings)
    database: DatabaseSettings = Field(default_factory=DatabaseSettings)
    llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings)
    vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings)
    embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings)
    tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings)


# --- 2. Create the Final Settings Object ---
class Settings:
    """
    Holds all application settings, validated and structured by Pydantic.
    Priority Order: Environment Variables > YAML File > Pydantic Defaults
    """
    def __init__(self):
        config_path = os.getenv("CONFIG_PATH", "config.yaml")
        yaml_data = {}
        if os.path.exists(config_path):
            print(f"✅ Loading configuration from {config_path}")
            with open(config_path, 'r') as f:
                yaml_data = yaml.safe_load(f) or {}
        else:
            print(f"⚠️ '{config_path}' not found. Using defaults and environment variables.")
        
        config_from_pydantic = AppConfig.parse_obj(yaml_data)

        def get_from_yaml(keys):
            d = yaml_data
            for key in keys:
                d = d.get(key) if isinstance(d, dict) else None
            return d

        self.PROJECT_NAME: str = os.getenv("PROJECT_NAME") or \
                                 get_from_yaml(["application", "project_name"]) or \
                                 config_from_pydantic.application.project_name
        self.VERSION: str = config_from_pydantic.application.version
        self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \
                                 get_from_yaml(["application", "log_level"]) or \
                                 config_from_pydantic.application.log_level

        # --- Database Settings ---
        self.DB_MODE: str = os.getenv("DB_MODE") or \
                                get_from_yaml(["database", "mode"]) or \
                                config_from_pydantic.database.mode

        local_db_path = os.getenv("LOCAL_DB_PATH") or \
                                get_from_yaml(["database", "local_path"]) or \
                                config_from_pydantic.database.local_path
        external_db_url = os.getenv("DATABASE_URL") or \
                                 get_from_yaml(["database", "url"]) or \
                                 config_from_pydantic.database.url

        if self.DB_MODE == "sqlite":
            normalized_path = local_db_path.lstrip("./")
            self.DATABASE_URL: str = f"sqlite:///./{normalized_path}" if normalized_path else "sqlite:///./data/ai_hub.db"
        else:
            self.DATABASE_URL: str = external_db_url or "sqlite:///./data/ai_hub.db" 
        
        # --- API Keys & Models ---
        self.DEEPSEEK_API_KEY: Optional[str] = os.getenv("DEEPSEEK_API_KEY")
        self.GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")

        self.DEEPSEEK_MODEL_NAME: str = os.getenv("DEEPSEEK_MODEL_NAME") or \
                                         get_from_yaml(["llm_providers", "deepseek_model_name"]) or \
                                         config_from_pydantic.llm_providers.deepseek_model_name
        self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \
                                         get_from_yaml(["llm_providers", "gemini_model_name"]) or \
                                         config_from_pydantic.llm_providers.gemini_model_name
        
        # --- Vector Store Settings ---
        self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \
                                         get_from_yaml(["vector_store", "index_path"]) or \
                                         config_from_pydantic.vector_store.index_path
        dimension_str = os.getenv("EMBEDDING_DIMENSION") or \
                        get_from_yaml(["vector_store", "embedding_dimension"]) or \
                        config_from_pydantic.vector_store.embedding_dimension
        self.EMBEDDING_DIMENSION: int = int(dimension_str)

        # --- Embedding Provider Settings ---
        embedding_provider_env = os.getenv("EMBEDDING_PROVIDER")
        if embedding_provider_env:
            embedding_provider_env = embedding_provider_env.lower()

        self.EMBEDDING_PROVIDER: EmbeddingProvider = EmbeddingProvider(embedding_provider_env or \
                                                                       get_from_yaml(["embedding_provider", "provider"]) or \
                                                                       config_from_pydantic.embedding_provider.provider)
        self.EMBEDDING_MODEL_NAME: str = os.getenv("EMBEDDING_MODEL_NAME") or \
                                         get_from_yaml(["embedding_provider", "model_name"]) or \
                                         config_from_pydantic.embedding_provider.model_name
        
        # Fixed logic: Prioritize EMBEDDING_API_KEY from env, then yaml, then fallback to GEMINI_API_KEY
        self.EMBEDDING_API_KEY: Optional[str] = os.getenv("EMBEDDING_API_KEY") or \
                                               get_from_yaml(["embedding_provider", "api_key"]) or \
                                               self.GEMINI_API_KEY

        # --- TTS Provider Settings ---
        tts_provider_env = os.getenv("TTS_PROVIDER")
        if tts_provider_env:
            tts_provider_env = tts_provider_env.lower()

        self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \
                                                    get_from_yaml(["tts_provider", "provider"]) or \
                                                    config_from_pydantic.tts_provider.provider)
        self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \
                                   get_from_yaml(["tts_provider", "voice_name"]) or \
                                   config_from_pydantic.tts_provider.voice_name
        self.TTS_MODEL_NAME: str = os.getenv("TTS_MODEL_NAME") or \
                                   get_from_yaml(["tts_provider", "model_name"]) or \
                                   config_from_pydantic.tts_provider.model_name
        
        # Fixed logic: Prioritize TTS_API_KEY from env, then yaml, then fallback to GEMINI_API_KEY
        self.TTS_API_KEY: Optional[str] = os.getenv("TTS_API_KEY") or \
                                          get_from_yaml(["tts_provider", "api_key"]) or \
                                          self.GEMINI_API_KEY
        
# Instantiate the single settings object for the application
settings = Settings()