import litellm
import asyncio
from typing import AsyncGenerator
from app.core.providers.base import TTSProvider
class GeneralTTSProvider(TTSProvider):
"""General Text-to-Speech provider using LiteLLM."""
def __init__(self, model_name: str, api_key: str, voice_name: str = "alloy", **kwargs):
self.model_name = model_name
self.api_key = api_key
self.voice_name = voice_name
self.kwargs = kwargs
async def generate_speech(self, text: str) -> AsyncGenerator[bytes, None]:
"""Generates speech using LiteLLM aspeech."""
try:
# Note: litellm.aspeech returns a response object with 'stream' or 'content'
# Depending on underlying provider (OpenAI supports streaming)
response = await litellm.aspeech(
model=self.model_name,
input=text,
voice=self.voice_name,
api_key=self.api_key,
drop_params=True,
**self.kwargs
)
# LiteLLM's implementation often returns raw bytes for simple TTS
# or an Httpx response that we can iterate over
if hasattr(response, "content"):
yield response.content
elif hasattr(response, "iter_bytes"):
async for chunk in response.iter_bytes():
yield chunk
elif isinstance(response, bytes):
yield response
else:
# Fallback to direct attribute
content = getattr(response, "content", b"")
yield content
except Exception as e:
raise RuntimeError(f"Failed to generate speech with LiteLLM for model '{self.model_name}': {e}")