diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py index fba4b49..f87fa37 100644 --- a/ai-hub/app/core/providers/tts/gemini.py +++ b/ai-hub/app/core/providers/tts/gemini.py @@ -84,9 +84,15 @@ headers = {"Content-Type": "application/json"} - # The dedicated TTS models require a system instruction to produce only audio + # Standardized payload for both dedicated TTS and multimodal models json_data = { - "contents": [{"role": "user", "parts": [{"text": text}]}], + "contents": [ + { + "parts": [ + {"text": f"Convert the following text to speech. Output ONLY audio data: {text}"} + ] + } + ], "generationConfig": { "responseModalities": ["AUDIO"], "speechConfig": {