diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 5912909..7cf5119 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -1,5 +1,5 @@ from app.config import settings -from .base import LLMProvider +from .base import LLMProvider,TTSProvider from .llm.deepseek import DeepSeekProvider from .llm.gemini import GeminiProvider from openai import AsyncOpenAI @@ -20,4 +20,10 @@ provider = _llm_providers.get(model_name) if not provider: raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}") - return provider \ No newline at end of file + return provider + +def get_tts_provider(provider_name: str, api_key: str) -> TTSProvider: + if provider_name == "gemini": + return GeminiTTSProvider(api_key=api_key) + # Add other TTS providers here if needed + raise ValueError(f"Unknown TTS provider: {provider_name}") \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 5912909..7cf5119 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -1,5 +1,5 @@ from app.config import settings -from .base import LLMProvider +from .base import LLMProvider,TTSProvider from .llm.deepseek import DeepSeekProvider from .llm.gemini import GeminiProvider from openai import AsyncOpenAI @@ -20,4 +20,10 @@ provider = _llm_providers.get(model_name) if not provider: raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}") - return provider \ No newline at end of file + return provider + +def get_tts_provider(provider_name: str, api_key: str) -> TTSProvider: + if provider_name == "gemini": + return GeminiTTSProvider(api_key=api_key) + # Add other TTS providers here if needed + raise ValueError(f"Unknown TTS provider: {provider_name}") \ No newline at end of file diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py new file mode 100644 index 0000000..98b191f --- /dev/null +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -0,0 +1,60 @@ +import os +import aiohttp +import asyncio +import base64 +from typing import AsyncGenerator +from app.core.providers.base import TTSProvider + +# New concrete class for Gemini TTS with the corrected voice list +class GeminiTTSProvider(TTSProvider): + # Class attribute with the corrected list of available voices + AVAILABLE_VOICES = [ + "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", + "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus", + "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", + "Algenib", "Rasalgethi", "Laomedeia", "Achernar", "Alnilam", + "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi", + "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat" + ] + + def __init__(self, api_key: str, voice_name: str = "Kore"): + if voice_name not in self.AVAILABLE_VOICES: + raise ValueError(f"Invalid voice name: {voice_name}. Choose from {self.AVAILABLE_VOICES}") + + self.api_key = api_key + self.api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent" + self.voice_name = voice_name + + async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]: + headers = { + "x-goog-api-key": self.api_key, + "Content-Type": "application/json" + } + json_data = { + "contents": [{ + "parts": [{ + "text": text + }] + }], + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": { + "voiceName": self.voice_name + } + } + } + }, + "model": "gemini-2.5-flash-preview-tts" + } + + async with aiohttp.ClientSession() as session: + async with session.post(self.api_url, headers=headers, json=json_data) as response: + response.raise_for_status() + response_json = await response.json() + + inline_data = response_json['candidates'][0]['content']['parts'][0]['inlineData']['data'] + audio_bytes = base64.b64decode(inline_data) + + yield audio_bytes \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 5912909..7cf5119 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -1,5 +1,5 @@ from app.config import settings -from .base import LLMProvider +from .base import LLMProvider,TTSProvider from .llm.deepseek import DeepSeekProvider from .llm.gemini import GeminiProvider from openai import AsyncOpenAI @@ -20,4 +20,10 @@ provider = _llm_providers.get(model_name) if not provider: raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}") - return provider \ No newline at end of file + return provider + +def get_tts_provider(provider_name: str, api_key: str) -> TTSProvider: + if provider_name == "gemini": + return GeminiTTSProvider(api_key=api_key) + # Add other TTS providers here if needed + raise ValueError(f"Unknown TTS provider: {provider_name}") \ No newline at end of file diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py new file mode 100644 index 0000000..98b191f --- /dev/null +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -0,0 +1,60 @@ +import os +import aiohttp +import asyncio +import base64 +from typing import AsyncGenerator +from app.core.providers.base import TTSProvider + +# New concrete class for Gemini TTS with the corrected voice list +class GeminiTTSProvider(TTSProvider): + # Class attribute with the corrected list of available voices + AVAILABLE_VOICES = [ + "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", + "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus", + "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", + "Algenib", "Rasalgethi", "Laomedeia", "Achernar", "Alnilam", + "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi", + "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat" + ] + + def __init__(self, api_key: str, voice_name: str = "Kore"): + if voice_name not in self.AVAILABLE_VOICES: + raise ValueError(f"Invalid voice name: {voice_name}. Choose from {self.AVAILABLE_VOICES}") + + self.api_key = api_key + self.api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent" + self.voice_name = voice_name + + async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]: + headers = { + "x-goog-api-key": self.api_key, + "Content-Type": "application/json" + } + json_data = { + "contents": [{ + "parts": [{ + "text": text + }] + }], + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": { + "voiceName": self.voice_name + } + } + } + }, + "model": "gemini-2.5-flash-preview-tts" + } + + async with aiohttp.ClientSession() as session: + async with session.post(self.api_url, headers=headers, json=json_data) as response: + response.raise_for_status() + response_json = await response.json() + + inline_data = response_json['candidates'][0]['content']['parts'][0]['inlineData']['data'] + audio_bytes = base64.b64decode(inline_data) + + yield audio_bytes \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt index 4be17cd..276e3a2 100644 --- a/ai-hub/requirements.txt +++ b/ai-hub/requirements.txt @@ -14,4 +14,5 @@ pytest-mock numpy faiss-cpu -dspy \ No newline at end of file +dspy +aioresponses \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 5912909..7cf5119 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -1,5 +1,5 @@ from app.config import settings -from .base import LLMProvider +from .base import LLMProvider,TTSProvider from .llm.deepseek import DeepSeekProvider from .llm.gemini import GeminiProvider from openai import AsyncOpenAI @@ -20,4 +20,10 @@ provider = _llm_providers.get(model_name) if not provider: raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}") - return provider \ No newline at end of file + return provider + +def get_tts_provider(provider_name: str, api_key: str) -> TTSProvider: + if provider_name == "gemini": + return GeminiTTSProvider(api_key=api_key) + # Add other TTS providers here if needed + raise ValueError(f"Unknown TTS provider: {provider_name}") \ No newline at end of file diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py new file mode 100644 index 0000000..98b191f --- /dev/null +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -0,0 +1,60 @@ +import os +import aiohttp +import asyncio +import base64 +from typing import AsyncGenerator +from app.core.providers.base import TTSProvider + +# New concrete class for Gemini TTS with the corrected voice list +class GeminiTTSProvider(TTSProvider): + # Class attribute with the corrected list of available voices + AVAILABLE_VOICES = [ + "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", + "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus", + "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", + "Algenib", "Rasalgethi", "Laomedeia", "Achernar", "Alnilam", + "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi", + "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat" + ] + + def __init__(self, api_key: str, voice_name: str = "Kore"): + if voice_name not in self.AVAILABLE_VOICES: + raise ValueError(f"Invalid voice name: {voice_name}. Choose from {self.AVAILABLE_VOICES}") + + self.api_key = api_key + self.api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent" + self.voice_name = voice_name + + async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]: + headers = { + "x-goog-api-key": self.api_key, + "Content-Type": "application/json" + } + json_data = { + "contents": [{ + "parts": [{ + "text": text + }] + }], + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": { + "voiceName": self.voice_name + } + } + } + }, + "model": "gemini-2.5-flash-preview-tts" + } + + async with aiohttp.ClientSession() as session: + async with session.post(self.api_url, headers=headers, json=json_data) as response: + response.raise_for_status() + response_json = await response.json() + + inline_data = response_json['candidates'][0]['content']['parts'][0]['inlineData']['data'] + audio_bytes = base64.b64decode(inline_data) + + yield audio_bytes \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt index 4be17cd..276e3a2 100644 --- a/ai-hub/requirements.txt +++ b/ai-hub/requirements.txt @@ -14,4 +14,5 @@ pytest-mock numpy faiss-cpu -dspy \ No newline at end of file +dspy +aioresponses \ No newline at end of file diff --git a/ai-hub/tests/core/providers/tts/test_gemini.py b/ai-hub/tests/core/providers/tts/test_gemini.py new file mode 100644 index 0000000..02ca463 --- /dev/null +++ b/ai-hub/tests/core/providers/tts/test_gemini.py @@ -0,0 +1,67 @@ +import pytest +import aiohttp +import asyncio +import base64 +from aioresponses import aioresponses +from app.core.providers.tts.gemini import GeminiTTSProvider + +# Note: The mock_aioresponse fixture is not needed and has been removed. + +@pytest.mark.asyncio +async def test_generate_speech_success(): + """ + Tests that generate_speech correctly makes an API call and processes the response. + """ + api_key = "test_api_key" + text_to_speak = "Hello, world!" + + # Create a dummy base64 encoded audio response + dummy_audio_bytes = b"This is a test audio stream." + dummy_base64_data = base64.b64encode(dummy_audio_bytes).decode('utf-8') + + # The mocked JSON response from the API + mock_response_json = { + "candidates": [{ + "content": { + "parts": [{ + "inlineData": { + "data": dummy_base64_data + } + }] + } + }] + } + + # Configure aioresponses to intercept the API call and return our mock data + tts_provider = GeminiTTSProvider(api_key=api_key) + with aioresponses() as m: + m.post( + tts_provider.api_url, + status=200, + payload=mock_response_json, + repeat=True + ) + + # Call the method under test + audio_stream = tts_provider.generate_speech(text_to_speak) + + # Iterate through the async generator to get the data + audio_chunks = [chunk async for chunk in audio_stream] + + # Assert that the list of chunks is not empty and contains the expected data + assert len(audio_chunks) == 1 + assert audio_chunks[0] == dummy_audio_bytes + +# The other tests for __init__ are not affected and can remain as they are. +def test_init_with_valid_voice_name(): + api_key = "test_api_key" + voice_name = "Zephyr" + tts_provider = GeminiTTSProvider(api_key=api_key, voice_name=voice_name) + assert tts_provider.api_key == api_key + assert tts_provider.voice_name == voice_name + +def test_init_with_invalid_voice_name(): + api_key = "test_api_key" + invalid_voice_name = "InvalidVoice" + with pytest.raises(ValueError, match="Invalid voice name"): + GeminiTTSProvider(api_key=api_key, voice_name=invalid_voice_name) \ No newline at end of file diff --git a/ai-hub/app/config.py b/ai-hub/app/config.py index 2d79f0e..ac5cbb5 100644 --- a/ai-hub/app/config.py +++ b/ai-hub/app/config.py @@ -18,6 +18,13 @@ GOOGLE_GENAI = "google_genai" MOCK = "mock" +# New Enum for supported TTS providers +class TTSProvider(str, Enum): + """ + An enum to represent the supported Text-to-Speech (TTS) providers. + """ + GOOGLE_GENAI = "google_genai" + class ApplicationSettings(BaseModel): project_name: str = "Cortex Hub" version: str = "1.0.0" @@ -39,6 +46,12 @@ model_name: str = "models/text-embedding-004" api_key: Optional[SecretStr] = None +# New settings class for TTS providers +class TTSProviderSettings(BaseModel): + provider: TTSProvider = Field(default=TTSProvider.GOOGLE_GENAI) + voice_name: str = "Kore" + api_key: Optional[SecretStr] = None + class VectorStoreSettings(BaseModel): index_path: str = "data/faiss_index.bin" embedding_dimension: int = 768 @@ -49,6 +62,8 @@ llm_providers: LLMProviderSettings = Field(default_factory=LLMProviderSettings) vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings) embedding_provider: EmbeddingProviderSettings = Field(default_factory=EmbeddingProviderSettings) + # Add the new TTS provider settings to the main config + tts_provider: TTSProviderSettings = Field(default_factory=TTSProviderSettings) # --- 2. Create the Final Settings Object --- @@ -81,8 +96,8 @@ self.VERSION: str = config_from_pydantic.application.version self.LOG_LEVEL: str = os.getenv("LOG_LEVEL") or \ - get_from_yaml(["application", "log_level"]) or \ - config_from_pydantic.application.log_level + get_from_yaml(["application", "log_level"]) or \ + config_from_pydantic.application.log_level # --- Database Settings --- self.DB_MODE: str = os.getenv("DB_MODE") or \ @@ -115,8 +130,8 @@ config_from_pydantic.llm_providers.deepseek_model_name self.GEMINI_MODEL_NAME: str = os.getenv("GEMINI_MODEL_NAME") or \ - get_from_yaml(["llm_providers", "gemini_model_name"]) or \ - config_from_pydantic.llm_providers.gemini_model_name + get_from_yaml(["llm_providers", "gemini_model_name"]) or \ + config_from_pydantic.llm_providers.gemini_model_name self.FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH") or \ get_from_yaml(["vector_store", "index_path"]) or \ @@ -146,6 +161,24 @@ self.EMBEDDING_API_KEY: Optional[str] = api_key_env or api_key_yaml or api_key_pydantic + # --- New TTS Provider Settings --- + tts_provider_env = os.getenv("TTS_PROVIDER") + if tts_provider_env: + tts_provider_env = tts_provider_env.lower() + self.TTS_PROVIDER: TTSProvider = TTSProvider(tts_provider_env or \ + get_from_yaml(["tts_provider", "provider"]) or \ + config_from_pydantic.tts_provider.provider) + + self.TTS_VOICE_NAME: str = os.getenv("TTS_VOICE_NAME") or \ + get_from_yaml(["tts_provider", "voice_name"]) or \ + config_from_pydantic.tts_provider.voice_name + + tts_api_key_env = os.getenv("TTS_API_KEY") + tts_api_key_yaml = get_from_yaml(["tts_provider", "api_key"]) + tts_api_key_pydantic = config_from_pydantic.tts_provider.api_key.get_secret_value() if config_from_pydantic.tts_provider.api_key else None + + self.TTS_API_KEY: Optional[str] = tts_api_key_env or tts_api_key_yaml or tts_api_key_pydantic + # Instantiate the single settings object for the application settings = Settings() diff --git a/ai-hub/app/config.yaml b/ai-hub/app/config.yaml index 11b1c0a..2f0c0aa 100644 --- a/ai-hub/app/config.yaml +++ b/ai-hub/app/config.yaml @@ -31,3 +31,9 @@ provider: "google_genai" # The model name for the embedding service. model_name: "gemini-embedding-001" + +tts_provider: + # The provider for the TTS service. + provider: "google_genai" + # The name of the voice to use for TTS. + voice_name: "Kore" \ No newline at end of file diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 5912909..7cf5119 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -1,5 +1,5 @@ from app.config import settings -from .base import LLMProvider +from .base import LLMProvider,TTSProvider from .llm.deepseek import DeepSeekProvider from .llm.gemini import GeminiProvider from openai import AsyncOpenAI @@ -20,4 +20,10 @@ provider = _llm_providers.get(model_name) if not provider: raise ValueError(f"Unsupported model provider: '{model_name}'. Supported providers are: {list(_llm_providers.keys())}") - return provider \ No newline at end of file + return provider + +def get_tts_provider(provider_name: str, api_key: str) -> TTSProvider: + if provider_name == "gemini": + return GeminiTTSProvider(api_key=api_key) + # Add other TTS providers here if needed + raise ValueError(f"Unknown TTS provider: {provider_name}") \ No newline at end of file diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py new file mode 100644 index 0000000..98b191f --- /dev/null +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -0,0 +1,60 @@ +import os +import aiohttp +import asyncio +import base64 +from typing import AsyncGenerator +from app.core.providers.base import TTSProvider + +# New concrete class for Gemini TTS with the corrected voice list +class GeminiTTSProvider(TTSProvider): + # Class attribute with the corrected list of available voices + AVAILABLE_VOICES = [ + "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", + "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus", + "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", + "Algenib", "Rasalgethi", "Laomedeia", "Achernar", "Alnilam", + "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi", + "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat" + ] + + def __init__(self, api_key: str, voice_name: str = "Kore"): + if voice_name not in self.AVAILABLE_VOICES: + raise ValueError(f"Invalid voice name: {voice_name}. Choose from {self.AVAILABLE_VOICES}") + + self.api_key = api_key + self.api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent" + self.voice_name = voice_name + + async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]: + headers = { + "x-goog-api-key": self.api_key, + "Content-Type": "application/json" + } + json_data = { + "contents": [{ + "parts": [{ + "text": text + }] + }], + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": { + "voiceName": self.voice_name + } + } + } + }, + "model": "gemini-2.5-flash-preview-tts" + } + + async with aiohttp.ClientSession() as session: + async with session.post(self.api_url, headers=headers, json=json_data) as response: + response.raise_for_status() + response_json = await response.json() + + inline_data = response_json['candidates'][0]['content']['parts'][0]['inlineData']['data'] + audio_bytes = base64.b64decode(inline_data) + + yield audio_bytes \ No newline at end of file diff --git a/ai-hub/requirements.txt b/ai-hub/requirements.txt index 4be17cd..276e3a2 100644 --- a/ai-hub/requirements.txt +++ b/ai-hub/requirements.txt @@ -14,4 +14,5 @@ pytest-mock numpy faiss-cpu -dspy \ No newline at end of file +dspy +aioresponses \ No newline at end of file diff --git a/ai-hub/tests/core/providers/tts/test_gemini.py b/ai-hub/tests/core/providers/tts/test_gemini.py new file mode 100644 index 0000000..02ca463 --- /dev/null +++ b/ai-hub/tests/core/providers/tts/test_gemini.py @@ -0,0 +1,67 @@ +import pytest +import aiohttp +import asyncio +import base64 +from aioresponses import aioresponses +from app.core.providers.tts.gemini import GeminiTTSProvider + +# Note: The mock_aioresponse fixture is not needed and has been removed. + +@pytest.mark.asyncio +async def test_generate_speech_success(): + """ + Tests that generate_speech correctly makes an API call and processes the response. + """ + api_key = "test_api_key" + text_to_speak = "Hello, world!" + + # Create a dummy base64 encoded audio response + dummy_audio_bytes = b"This is a test audio stream." + dummy_base64_data = base64.b64encode(dummy_audio_bytes).decode('utf-8') + + # The mocked JSON response from the API + mock_response_json = { + "candidates": [{ + "content": { + "parts": [{ + "inlineData": { + "data": dummy_base64_data + } + }] + } + }] + } + + # Configure aioresponses to intercept the API call and return our mock data + tts_provider = GeminiTTSProvider(api_key=api_key) + with aioresponses() as m: + m.post( + tts_provider.api_url, + status=200, + payload=mock_response_json, + repeat=True + ) + + # Call the method under test + audio_stream = tts_provider.generate_speech(text_to_speak) + + # Iterate through the async generator to get the data + audio_chunks = [chunk async for chunk in audio_stream] + + # Assert that the list of chunks is not empty and contains the expected data + assert len(audio_chunks) == 1 + assert audio_chunks[0] == dummy_audio_bytes + +# The other tests for __init__ are not affected and can remain as they are. +def test_init_with_valid_voice_name(): + api_key = "test_api_key" + voice_name = "Zephyr" + tts_provider = GeminiTTSProvider(api_key=api_key, voice_name=voice_name) + assert tts_provider.api_key == api_key + assert tts_provider.voice_name == voice_name + +def test_init_with_invalid_voice_name(): + api_key = "test_api_key" + invalid_voice_name = "InvalidVoice" + with pytest.raises(ValueError, match="Invalid voice name"): + GeminiTTSProvider(api_key=api_key, voice_name=invalid_voice_name) \ No newline at end of file diff --git a/ai-hub/tests/test_config.py b/ai-hub/tests/test_config.py index abea3b0..5687698 100644 --- a/ai-hub/tests/test_config.py +++ b/ai-hub/tests/test_config.py @@ -1,7 +1,7 @@ import pytest import importlib import yaml -from app.config import EmbeddingProvider +from app.config import EmbeddingProvider, TTSProvider, Settings @pytest.fixture @@ -9,7 +9,7 @@ """ Creates a temporary config.yaml file and returns its path. Corrected the 'provider' value to be lowercase 'mock' to match the Enum. - Added database settings for testing. + Added database and TTS settings for testing. """ config_content = { "application": { @@ -20,12 +20,18 @@ "embedding_provider": { # This value must be lowercase to match the Pydantic Enum member "provider": "mock", - "model_name": "embedding-model-from-yaml" + "model_name": "embedding-model-from-yaml", + "api_key": "embedding-api-from-yaml" }, "database": { "mode": "sqlite", "local_path": "custom_folder/test_ai_hub.db", "url": "postgresql://user:pass@host/dbname" # Should be ignored for sqlite mode + }, + "tts_provider": { + "provider": "google_genai", + "voice_name": "Laomedeia", + "api_key": "tts-api-from-yaml" } } config_path = tmp_path / "test_config.yaml" @@ -34,62 +40,73 @@ return str(config_path) -@pytest.fixture(autouse=True) -def mock_api_keys(monkeypatch): +@pytest.fixture +def clear_all_env(monkeypatch): """ - Automatically sets mock API keys for all tests to prevent the - ValueError from being raised in config.py. + A fixture to clear all relevant environment variables for test isolation. """ - monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_deepseek_key") - monkeypatch.setenv("GEMINI_API_KEY", "mock_gemini_key") - monkeypatch.setenv("EMBEDDING_API_KEY", "mock_embedding_key") - - -def test_sqlite_db_url_from_yaml(monkeypatch, tmp_config_file): - """Tests DATABASE_URL is constructed correctly from YAML sqlite local_path.""" - monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + monkeypatch.delenv("CONFIG_PATH", raising=False) + monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False) + monkeypatch.delenv("GEMINI_API_KEY", raising=False) + monkeypatch.delenv("EMBEDDING_API_KEY", raising=False) + monkeypatch.delenv("TTS_PROVIDER", raising=False) + monkeypatch.delenv("TTS_VOICE_NAME", raising=False) + monkeypatch.delenv("TTS_API_KEY", raising=False) monkeypatch.delenv("DB_MODE", raising=False) monkeypatch.delenv("LOCAL_DB_PATH", raising=False) monkeypatch.delenv("DATABASE_URL", raising=False) - from app import config - importlib.reload(config) +@pytest.fixture +def mock_all_keys(monkeypatch): + """ + A fixture to set all mock API keys. + """ + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_deepseek_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_gemini_key") + monkeypatch.setenv("EMBEDDING_API_KEY", "mock_embedding_key") + monkeypatch.setenv("TTS_API_KEY", "mock_tts_key") + + +def test_sqlite_db_url_from_yaml(monkeypatch, tmp_config_file, clear_all_env): + """Tests DATABASE_URL is constructed correctly from YAML sqlite local_path.""" + monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") + + settings = Settings() expected_path = "sqlite:///./custom_folder/test_ai_hub.db" - assert config.settings.DB_MODE == "sqlite" - assert config.settings.DATABASE_URL == expected_path + assert settings.DB_MODE == "sqlite" + assert settings.DATABASE_URL == expected_path -def test_sqlite_db_url_from_env_local_path(monkeypatch, tmp_path): +def test_sqlite_db_url_from_env_local_path(monkeypatch, tmp_path, clear_all_env): """Tests that LOCAL_DB_PATH env var overrides YAML for sqlite DATABASE_URL.""" monkeypatch.setenv("DB_MODE", "sqlite") monkeypatch.setenv("LOCAL_DB_PATH", "env_folder/env_ai_hub.db") - monkeypatch.delenv("CONFIG_PATH", raising=False) - monkeypatch.delenv("DATABASE_URL", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") - from app import config - importlib.reload(config) - + settings = Settings() expected_path = "sqlite:///./env_folder/env_ai_hub.db" - assert config.settings.DB_MODE == "sqlite" - assert config.settings.DATABASE_URL == expected_path + assert settings.DB_MODE == "sqlite" + assert settings.DATABASE_URL == expected_path -def test_external_db_url_used_when_not_sqlite(monkeypatch, tmp_config_file): +def test_external_db_url_used_when_not_sqlite(monkeypatch, tmp_config_file, clear_all_env): """Tests DATABASE_URL uses external URL when DB_MODE is not sqlite.""" monkeypatch.setenv("CONFIG_PATH", tmp_config_file) monkeypatch.setenv("DB_MODE", "postgresql") monkeypatch.setenv("DATABASE_URL", "postgresql://env_user:env_pass@env_host/env_db") - monkeypatch.delenv("LOCAL_DB_PATH", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") - from app import config - importlib.reload(config) - - assert config.settings.DB_MODE == "postgresql" - assert config.settings.DATABASE_URL == "postgresql://env_user:env_pass@env_host/env_db" + settings = Settings() + assert settings.DB_MODE == "postgresql" + assert settings.DATABASE_URL == "postgresql://env_user:env_pass@env_host/env_db" -def test_external_db_url_from_yaml_when_not_sqlite(monkeypatch, tmp_path): +def test_external_db_url_from_yaml_when_not_sqlite(monkeypatch, tmp_path, clear_all_env): """Tests DATABASE_URL uses YAML url when DB_MODE != sqlite and no env DATABASE_URL.""" # Write YAML with postgresql mode and url config_content = { @@ -97,32 +114,68 @@ "mode": "postgresql", "url": "postgresql://yaml_user:yaml_pass@yaml_host/yaml_db", "local_path": "ignored_path_for_postgresql.db" - } + }, + "llm_providers": {"deepseek_model_name": "deepseek-from-yaml"} } config_path = tmp_path / "test_config_pg.yaml" with open(config_path, 'w') as f: yaml.dump(config_content, f) monkeypatch.setenv("CONFIG_PATH", str(config_path)) - monkeypatch.delenv("DATABASE_URL", raising=False) - monkeypatch.delenv("DB_MODE", raising=False) - monkeypatch.delenv("LOCAL_DB_PATH", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") - from app import config - importlib.reload(config) - - assert config.settings.DB_MODE == "postgresql" - assert config.settings.DATABASE_URL == "postgresql://yaml_user:yaml_pass@yaml_host/yaml_db" + settings = Settings() + assert settings.DB_MODE == "postgresql" + assert settings.DATABASE_URL == "postgresql://yaml_user:yaml_pass@yaml_host/yaml_db" -def test_sqlite_db_url_defaults(monkeypatch): +def test_sqlite_db_url_defaults(monkeypatch, clear_all_env): """Tests DATABASE_URL defaults to sqlite path if no env or YAML.""" monkeypatch.setenv("DB_MODE", "sqlite") - monkeypatch.delenv("LOCAL_DB_PATH", raising=False) - monkeypatch.delenv("DATABASE_URL", raising=False) - monkeypatch.delenv("CONFIG_PATH", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") + settings = Settings() - from app import config - importlib.reload(config) + assert settings.DB_MODE == "sqlite" + assert settings.DATABASE_URL == "sqlite:///./data/ai_hub.db" - assert config.settings.DB_MODE == "sqlite" - assert config.settings.DATABASE_URL == "sqlite:///./data/ai_hub.db" + +# --- Tests for TTS Configuration --- + +def test_tts_settings_from_yaml(monkeypatch, tmp_config_file, clear_all_env): + """Tests that TTS settings are loaded correctly from a YAML file.""" + monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") + + settings = Settings() + assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GENAI + assert settings.TTS_VOICE_NAME == "Laomedeia" + assert settings.TTS_API_KEY == "tts-api-from-yaml" + + +def test_tts_settings_from_env(monkeypatch, tmp_config_file, clear_all_env): + """Tests that TTS environment variables override the YAML file.""" + monkeypatch.setenv("CONFIG_PATH", tmp_config_file) + # Explicitly set all TTS env vars for this test + monkeypatch.setenv("TTS_PROVIDER", "google_genai") + monkeypatch.setenv("TTS_VOICE_NAME", "Zephyr") + monkeypatch.setenv("TTS_API_KEY", "env_tts_key") + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") + + settings = Settings() + assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GENAI + assert settings.TTS_VOICE_NAME == "Zephyr" + assert settings.TTS_API_KEY == "env_tts_key" + + +def test_tts_settings_defaults(monkeypatch, clear_all_env): + """Tests that TTS settings fall back to Pydantic defaults if no env or YAML are present.""" + monkeypatch.setenv("DEEPSEEK_API_KEY", "mock_key") + monkeypatch.setenv("GEMINI_API_KEY", "mock_key") + settings = Settings() + + assert settings.TTS_PROVIDER == TTSProvider.GOOGLE_GENAI + assert settings.TTS_VOICE_NAME == "Kore" + assert settings.TTS_API_KEY is None