cortex-hub/ai-hub/app/core/providers/stt/general.py at master

Fork: 0
yangyangxie / cortex-hub
Find file
Newer
Older
cortex-hub / ai-hub / app / core / providers / stt / general.py
Antigravity AI 5 hours ago 2 KB fix: remove forced sessionId reset in websocket service
Raw Blame History
import os
import litellm
import logging
import io
from typing import Optional
from fastapi import HTTPException
from app.core.providers.base import STTProvider

# Configure logging
logger = logging.getLogger(__name__)

class GeneralSTTProvider(STTProvider):
    """Concrete General STT provider using litellm for Whisper transcription."""

    def __init__(
        self,
        api_key: str,
        model_name: str = ""  
    ):
        if not api_key:
            raise ValueError("API_KEY for general STT provider not set or provided.")
        if not model_name:
            raise ValueError("model_name for general STT provider not set or provided")
        self.api_key = api_key
        self.model_name = model_name

        logger.debug(f"Initialized GeneralSTTProvider with model: {self.model_name}")

    async def transcribe_audio(self, audio_data: bytes) -> str:
        """
        Transcribes audio using the litellm Whisper transcription endpoint.
        """
        logger.debug("Starting transcription process using litellm.transcription().")

        try:
            # Wrap audio bytes in a BytesIO object to mimic a file
            audio_file = io.BytesIO(audio_data)
            audio_file.name = "input.wav"  # Required by some clients (like Whisper)

            # Call litellm.transcription (sync function, use thread executor)
            import asyncio
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(
                None,
                lambda: litellm.transcription(model=self.model_name, file=audio_file, api_key=self.api_key)
            )

            # Extract text
            transcript = response.get("text", "")
            logger.debug(f"Transcription succeeded. Text: '{transcript[:50]}...'")
            return transcript

        except litellm.exceptions.AuthenticationError as e:
            logger.error(f"LiteLLM authentication error: {e.message}")
            raise HTTPException(status_code=401, detail="Authentication failed: Invalid API key.")
        except litellm.exceptions.APIError as e:
            logger.error(f"LiteLLM API error occurred: {e}")
            status_code = getattr(e, "status_code", 500)
            raise HTTPException(status_code=status_code, detail=f"API request failed: {e.message}")
        except Exception as e:
            logger.error(f"Unexpected error during transcription: {e}", exc_info=True)
            raise HTTPException(status_code=500, detail="Failed to transcribe audio due to an unexpected error.")