diff --git a/ai-hub/app/core/providers/tts/gemini.py b/ai-hub/app/core/providers/tts/gemini.py
index fba4b49..f87fa37 100644
--- a/ai-hub/app/core/providers/tts/gemini.py
+++ b/ai-hub/app/core/providers/tts/gemini.py
@@ -84,9 +84,15 @@
 
         headers = {"Content-Type": "application/json"}
 
-        # The dedicated TTS models require a system instruction to produce only audio
+        # Standardized payload for both dedicated TTS and multimodal models
         json_data = {
-            "contents": [{"role": "user", "parts": [{"text": text}]}],
+            "contents": [
+                {
+                    "parts": [
+                        {"text": f"Convert the following text to speech. Output ONLY audio data: {text}"}
+                    ]
+                }
+            ],
             "generationConfig": {
                 "responseModalities": ["AUDIO"],
                 "speechConfig": {