import os
import litellm
import logging
import io
from typing import Optional
from fastapi import HTTPException
from app.core.providers.base import STTProvider
# Configure logging
logger = logging.getLogger(__name__)
class GeneralSTTProvider(STTProvider):
"""Concrete General STT provider using litellm for Whisper transcription."""
def __init__(
self,
api_key: str,
model_name: str = ""
):
if not api_key:
raise ValueError("API_KEY for general STT provider not set or provided.")
if not model_name:
raise ValueError("model_name for general STT provider not set or provided")
self.api_key = api_key
self.model_name = model_name
logger.debug(f"Initialized GeneralSTTProvider with model: {self.model_name}")
async def transcribe_audio(self, audio_data: bytes) -> str:
"""
Transcribes audio using the litellm Whisper transcription endpoint.
"""
logger.debug("Starting transcription process using litellm.transcription().")
try:
# Wrap audio bytes in a BytesIO object to mimic a file
audio_file = io.BytesIO(audio_data)
audio_file.name = "input.wav" # Required by some clients (like Whisper)
# Call litellm.transcription (sync function, use thread executor)
import asyncio
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None,
lambda: litellm.transcription(model=self.model_name, file=audio_file, api_key=self.api_key)
)
# Extract text
transcript = response.get("text", "")
logger.debug(f"Transcription succeeded. Text: '{transcript[:50]}...'")
return transcript
except litellm.exceptions.AuthenticationError as e:
logger.error(f"LiteLLM authentication error: {e.message}")
raise HTTPException(status_code=401, detail="Authentication failed: Invalid API key.")
except litellm.exceptions.APIError as e:
logger.error(f"LiteLLM API error occurred: {e}")
status_code = getattr(e, "status_code", 500)
raise HTTPException(status_code=status_code, detail=f"API request failed: {e.message}")
except Exception as e:
logger.error(f"Unexpected error during transcription: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Failed to transcribe audio due to an unexpected error.")