#!/usr/bin/env bash set -euo pipefail if [ $# -lt 1 ]; then echo "Usage: $0 <audio_file>" exit 1 fi AUDIO_PATH="$1" DISPLAY_NAME="AUDIO" if [ ! -f "$AUDIO_PATH" ]; then echo "Error: File not found: $AUDIO_PATH" exit 1 fi if [ -z "${GEMINI_API_KEY:-}" ]; then echo "Error: GEMINI_API_KEY environment variable not set." exit 1 fi MIME_TYPE=$(file -b --mime-type "${AUDIO_PATH}") NUM_BYTES=$(wc -c < "${AUDIO_PATH}") TMP_HEADER_FILE=$(mktemp) # Remove any old JSON files rm -f file_info.json request.json # Step 1: Start resumable upload curl -s "https://generativelanguage.googleapis.com/upload/v1beta/files" \ -H "x-goog-api-key: $GEMINI_API_KEY" \ -D "$TMP_HEADER_FILE" \ -H "X-Goog-Upload-Protocol: resumable" \ -H "X-Goog-Upload-Command: start" \ -H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \ -H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \ -H "Content-Type: application/json" \ -d "{\"file\": {\"display_name\": \"${DISPLAY_NAME}\"}}" >/dev/null UPLOAD_URL=$(grep -i "x-goog-upload-url: " "$TMP_HEADER_FILE" | cut -d" " -f2 | tr -d "\r") rm "$TMP_HEADER_FILE" # Step 2: Upload the file curl -s "${UPLOAD_URL}" \ -H "Content-Length: ${NUM_BYTES}" \ -H "X-Goog-Upload-Offset: 0" \ -H "X-Goog-Upload-Command: upload, finalize" \ --data-binary "@${AUDIO_PATH}" > data/file_info.json FILE_URI="https://generativelanguage.googleapis.com/v1beta/files/$(jq -r '.file.name' data/file_info.json | cut -d'/' -f2)" # Step 3: Prepare request cat > data/request.json <<EOF { "contents": [ { "parts": [ { "fileData": { "mimeType": "${MIME_TYPE}", "fileUri": "${FILE_URI}" } }, { "text": "Transcribe this audio file." } ] } ] } EOF # Step 4: Send transcription request RESPONSE=$(curl -s "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \ -H "x-goog-api-key: $GEMINI_API_KEY" \ -H "Content-Type: application/json" \ -X POST \ --data-binary "@request.json") # Step 5: Output transcript text echo "$RESPONSE" | jq -r '.candidates[0].content.parts[0].text'