diff --git a/ai-hub/app/api/routes/tts.py b/ai-hub/app/api/routes/tts.py index 9c1d938..5352008 100644 --- a/ai-hub/app/api/routes/tts.py +++ b/ai-hub/app/api/routes/tts.py @@ -60,30 +60,55 @@ all_text = request.text separators = ['.', '?', '!', '\n', '。', '?', '!', ',', ';'] - # Intelligent Multi-Splitter: Break text into a sequence of sentences/clauses - # instead of just "First" and "Rest". - def split_text(text, max_chunk_size=200): + # Adaptive Multi-Splitter: + # 1. Tiny first chunk for instant response. + # 2. Larger later chunks to build a safety buffer for the stream. + def split_text(text): chunks = [] - while len(text) > 0: - if len(text) <= max_chunk_size: - chunks.append(text) + primary_seps = ['.', '?', '!', '\n', '。', '?', '!'] + secondary_seps = [',', ';', ',', ';', ':'] + + is_first = True + current_text = text.strip() + + while current_text: + # Target size: 80 for first (speed), 300 for rest (buffer) + target_size = 80 if is_first else 300 + if len(current_text) <= target_size: + chunks.append(current_text) break - # Find the best separator to split at split_at = -1 - # Look for a separator within a reasonable sentence length - for i in range(min(len(text)-1, max_chunk_size), 20, -1): - if text[i] in separators: + # Priority 1: Sentence ends + for i in range(min(len(current_text)-1, target_size), 40, -1): + if current_text[i] in primary_seps: split_at = i + 1 break + # Priority 2: Clauses/commas (only if no sentence end found) + if split_at == -1: + for i in range(min(len(current_text)-1, target_size), 40, -1): + if current_text[i] in secondary_seps: + split_at = i + 1 + break + + # Priority 3: Spaces (forced) + if split_at == -1: + for i in range(min(len(current_text)-1, target_size), 40, -1): + if current_text[i] == ' ': + split_at = i + 1 + break + if split_at != -1: - chunks.append(text[:split_at].strip()) - text = text[split_at:].strip() + chunks.append(current_text[:split_at].strip()) + current_text = current_text[split_at:].strip() + is_first = False else: - # Forced split if no separator found - chunks.append(text[:max_chunk_size].strip()) - text = text[max_chunk_size:].strip() + # Hard cutoff fallback + chunks.append(current_text[:target_size].strip()) + current_text = current_text[target_size:].strip() + is_first = False + return [c for c in chunks if c] chunks = split_text(all_text)