#!/usr/bin/env python3 """ 豆包语音转换工具 支持:文字转语音 (TTS) """ import os import sys import json import base64 import requests from pathlib import Path class DoubaoVoiceConverter: """豆包语音转换工具类""" def __init__(self): # 从环境变量读取配置 self.app_id = os.environ.get("DOUBAO_APP_ID") self.access_token = os.environ.get("DOUBAO_ACCESS_TOKEN") if not self.app_id or not self.access_token: raise ValueError( "请先设置环境变量:\n" "export DOUBAO_APP_ID='your_app_id'\n" "export DOUBAO_ACCESS_TOKEN='your_access_token'" ) # API版本选择: V1 (默认, 支持基础音色) 或 V3 (豆包2.0, 需额外配置) self.use_v3 = os.environ.get("DOUBAO_USE_V3", "false").lower() == "true" if self.use_v3: self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional" self.resource_id = os.environ.get("DOUBAO_RESOURCE_ID", "volc.bigmodel.tts") else: # V1 API - 稳定可用,支持基础音色 self.tts_url = "https://openspeech.bytedance.com/api/v1/tts" def text_to_speech( self, text: str, output_file: str = "output.mp3", voice_type: str = "BV700_V2_streaming" ) -> str: """ 文字转语音 (TTS) Args: text: 要转换的文字 output_file: 输出音频文件路径 voice_type: 音色类型 - BV700_V2_streaming: 通用女声(推荐) - BV701_V2_streaming: 通用男声 - BV406_streaming: 温柔女声 - BV158_streaming: 活泼女声 - BV115_streaming: 磁性男声 Returns: str: 输出文件路径 """ print(f"📝 文字转语音中...") print(f" 文字: {text[:50]}{'...' if len(text) > 50 else ''}") print(f" 音色: {voice_type}") headers = { "Authorization": f"Bearer;{self.access_token}", "Content-Type": "application/json" } # V3 API需要Resource-Id (如果启用) if self.use_v3: headers["Resource-Id"] = self.resource_id payload = { "app": { "appid": self.app_id, "token": self.access_token, "cluster": "volcano_tts" }, "user": { "uid": "user_001" }, "audio": { "voice_type": voice_type, "encoding": "mp3", "speed_ratio": 1.0, "volume_ratio": 1.0, "pitch_ratio": 1.0 }, "request": { "reqid": f"tts_{os.urandom(8).hex()}", "text": text, "text_type": "plain", "operation": "query" } } try: response = requests.post(self.tts_url, headers=headers, json=payload, timeout=30) # 打印响应头信息 print(f"\n📋 响应信息:") print(f" HTTP状态码: {response.status_code}") if 'X-Tt-Logid' in response.headers: print(f" RequestId: {response.headers['X-Tt-Logid']}") if 'X-Request-Id' in response.headers: print(f" X-Request-Id: {response.headers['X-Request-Id']}") data = response.json() # 打印完整响应 print(f"\n📄 完整响应:") print(json.dumps(data, indent=2, ensure_ascii=False)) print() if data.get("code") == 3000: # 成功:解码并保存音频 audio_data = base64.b64decode(data["data"]) with open(output_file, "wb") as f: f.write(audio_data) file_size = len(audio_data) / 1024 # KB print(f"✅ 语音合成成功!") print(f" 输出: {output_file} ({file_size:.1f} KB)") return output_file else: error_msg = data.get("message", "未知错误") reqid = data.get("reqid", "未知") raise Exception(f"TTS 失败\n 错误码: {data.get('code')}\n 错误信息: {error_msg}\n RequestId: {reqid}") except requests.exceptions.Timeout: raise Exception("请求超时,请检查网络连接") except Exception as e: raise Exception(f"TTS 调用失败: {str(e)}") def main(): """命令行工具""" import argparse parser = argparse.ArgumentParser(description="豆包语音转换工具") subparsers = parser.add_subparsers(dest="command", help="选择功能") # TTS 命令 tts_parser = subparsers.add_parser("tts", help="文字转语音") tts_parser.add_argument("text", help="要转换的文字") tts_parser.add_argument("-o", "--output", default="output.mp3", help="输出音频文件(默认: output.mp3)") tts_parser.add_argument("-v", "--voice", default="BV700_V2_streaming", help="音色类型(默认: BV700_V2_streaming 通用女声)") args = parser.parse_args() if not args.command: parser.print_help() return try: converter = DoubaoVoiceConverter() if args.command == "tts": converter.text_to_speech(args.text, args.output, args.voice) except Exception as e: print(f"❌ 错误: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()