import os
import yaml
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()
# --- 1. Define the Configuration Schema (No changes here) ---
class ApplicationSettings(BaseModel):
project_name: str = "Cortex Hub"
version: str = "1.0.0"
class DatabaseSettings(BaseModel):
mode: str = "sqlite"
url: str = "postgresql://user:password@localhost/ai_hub_db"
class LLMProviderSettings(BaseModel):
deepseek_model_name: str = "deepseek-chat"
gemini_model_name: str = "gemini-1.5-flash-latest"
class VectorStoreSettings(BaseModel):
index_path: str = "data/faiss_index.bin"
embedding_dimension: int = 768
class AppConfig(BaseModel):
application: ApplicationSettings = Field(default_factory=ApplicationSettings)
database: DatabaseSettings = Field(default_factory=DatabaseSettings)
llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings)
vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings)
# --- 2. Create the Final Settings Object ---
class Settings:
"""
Holds all application settings, validated and structured by Pydantic.
Priority Order: YAML File > Environment Variables > Pydantic Defaults
"""
def __init__(self):
# Load base configuration from YAML
config_path = os.getenv("CONFIG_PATH", "config.yaml")
yaml_data = {}
if os.path.exists(config_path):
print(f"✅ Loading configuration from {config_path}")
with open(config_path, 'r') as f:
yaml_data = yaml.safe_load(f) or {}
else:
print(f"⚠️ '{config_path}' not found. Using defaults and environment variables.")
# Parse the loaded data to get defaults for any missing sections
config_from_pydantic = AppConfig.parse_obj(yaml_data)
# --- **CRITICAL FIX**: Apply the correct priority logic ---
# Helper to safely get nested values from the raw YAML dict
def get_from_yaml(keys):
d = yaml_data
for key in keys:
d = d.get(key) if isinstance(d, dict) else None
return d
# --- Set final attributes with YAML > ENV > Default priority ---
self.PROJECT_NAME: str = get_from_yaml(["application", "project_name"]) or \
os.getenv("PROJECT_NAME") or \
config_from_pydantic.application.project_name
self.VERSION: str = config_from_pydantic.application.version
self.DB_MODE: str = get_from_yaml(["database", "mode"]) or \
os.getenv("DB_MODE") or \
config_from_pydantic.database.mode
if self.DB_MODE == "sqlite":
self.DATABASE_URL: str = "sqlite:///./data/ai_hub.db"
else:
self.DATABASE_URL: str = get_from_yaml(["database", "url"]) or \
os.getenv("DATABASE_URL") or \
config_from_pydantic.database.url
self.DEEPSEEK_API_KEY: str = os.getenv("DEEPSEEK_API_KEY")
self.GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY")
if not self.DEEPSEEK_API_KEY or not self.GEMINI_API_KEY:
raise ValueError("API keys must be set in the environment.")
self.DEEPSEEK_MODEL_NAME: str = get_from_yaml(["llm_providers", "deepseek_model_name"]) or \
os.getenv("DEEPSEEK_MODEL_NAME") or \
config_from_pydantic.llm_providers.deepseek_model_name
self.GEMINI_MODEL_NAME: str = get_from_yaml(["llm_providers", "gemini_model_name"]) or \
os.getenv("GEMINI_MODEL_NAME") or \
config_from_pydantic.llm_providers.gemini_model_name
self.FAISS_INDEX_PATH: str = get_from_yaml(["vector_store", "index_path"]) or \
os.getenv("FAISS_INDEX_PATH") or \
config_from_pydantic.vector_store.index_path
dimension_str = get_from_yaml(["vector_store", "embedding_dimension"]) or \
os.getenv("EMBEDDING_DIMENSION") or \
config_from_pydantic.vector_store.embedding_dimension
self.EMBEDDING_DIMENSION: int = int(dimension_str)
# Instantiate the single settings object for the application
settings = Settings()