Newer
Older
cortex-hub / ai-hub / app / core / providers / tts / general.py
import litellm
import asyncio
from typing import AsyncGenerator
from app.core.providers.base import TTSProvider

class GeneralTTSProvider(TTSProvider):
    """General Text-to-Speech provider using LiteLLM."""
    def __init__(self, model_name: str, api_key: str, voice_name: str = "alloy", **kwargs):
        self.model_name = model_name
        self.api_key = api_key
        self.voice_name = voice_name
        self.kwargs = kwargs

    async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]:
        """Generates speech using LiteLLM aspeech."""
        try:
            # Note: litellm.aspeech returns a response object with 'stream' or 'content' 
            # Depending on underlying provider (OpenAI supports streaming)
            response = await litellm.aspeech(
                model=self.model_name,
                input=text,
                voice=self.voice_name,
                api_key=self.api_key,
                drop_params=True,
                **self.kwargs
            )
            
            # LiteLLM's implementation often returns raw bytes for simple TTS 
            # or an Httpx response that we can iterate over
            if hasattr(response, "content"):
                yield response.content
            elif hasattr(response, "iter_bytes"):
                async for chunk in response.iter_bytes():
                    yield chunk
            elif isinstance(response, bytes):
                yield response
            else:
                # Fallback to direct attribute
                content = getattr(response, "content", b"")
                yield content
                
        except Exception as e:
            raise RuntimeError(f"Failed to generate speech with LiteLLM for model '{self.model_name}': {e}")