move claude-marketplace to ai-proj-helper
This commit is contained in:
171
plugins/doubao-voice-plugin/scripts/voice_converter.py
Executable file
171
plugins/doubao-voice-plugin/scripts/voice_converter.py
Executable file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
豆包语音转换工具
|
||||
支持:文字转语音 (TTS)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import base64
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class DoubaoVoiceConverter:
|
||||
"""豆包语音转换工具类"""
|
||||
|
||||
def __init__(self):
|
||||
# 从环境变量读取配置
|
||||
self.app_id = os.environ.get("DOUBAO_APP_ID")
|
||||
self.access_token = os.environ.get("DOUBAO_ACCESS_TOKEN")
|
||||
|
||||
if not self.app_id or not self.access_token:
|
||||
raise ValueError(
|
||||
"请先设置环境变量:\n"
|
||||
"export DOUBAO_APP_ID='your_app_id'\n"
|
||||
"export DOUBAO_ACCESS_TOKEN='your_access_token'"
|
||||
)
|
||||
|
||||
# API版本选择: V1 (默认, 支持基础音色) 或 V3 (豆包2.0, 需额外配置)
|
||||
self.use_v3 = os.environ.get("DOUBAO_USE_V3", "false").lower() == "true"
|
||||
|
||||
if self.use_v3:
|
||||
self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
|
||||
self.resource_id = os.environ.get("DOUBAO_RESOURCE_ID", "volc.bigmodel.tts")
|
||||
else:
|
||||
# V1 API - 稳定可用,支持基础音色
|
||||
self.tts_url = "https://openspeech.bytedance.com/api/v1/tts"
|
||||
|
||||
def text_to_speech(
|
||||
self,
|
||||
text: str,
|
||||
output_file: str = "output.mp3",
|
||||
voice_type: str = "BV700_V2_streaming"
|
||||
) -> str:
|
||||
"""
|
||||
文字转语音 (TTS)
|
||||
|
||||
Args:
|
||||
text: 要转换的文字
|
||||
output_file: 输出音频文件路径
|
||||
voice_type: 音色类型
|
||||
- BV700_V2_streaming: 通用女声(推荐)
|
||||
- BV701_V2_streaming: 通用男声
|
||||
- BV406_streaming: 温柔女声
|
||||
- BV158_streaming: 活泼女声
|
||||
- BV115_streaming: 磁性男声
|
||||
|
||||
Returns:
|
||||
str: 输出文件路径
|
||||
"""
|
||||
print(f"📝 文字转语音中...")
|
||||
print(f" 文字: {text[:50]}{'...' if len(text) > 50 else ''}")
|
||||
print(f" 音色: {voice_type}")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer;{self.access_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# V3 API需要Resource-Id (如果启用)
|
||||
if self.use_v3:
|
||||
headers["Resource-Id"] = self.resource_id
|
||||
|
||||
payload = {
|
||||
"app": {
|
||||
"appid": self.app_id,
|
||||
"token": self.access_token,
|
||||
"cluster": "volcano_tts"
|
||||
},
|
||||
"user": {
|
||||
"uid": "user_001"
|
||||
},
|
||||
"audio": {
|
||||
"voice_type": voice_type,
|
||||
"encoding": "mp3",
|
||||
"speed_ratio": 1.0,
|
||||
"volume_ratio": 1.0,
|
||||
"pitch_ratio": 1.0
|
||||
},
|
||||
"request": {
|
||||
"reqid": f"tts_{os.urandom(8).hex()}",
|
||||
"text": text,
|
||||
"text_type": "plain",
|
||||
"operation": "query"
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(self.tts_url, headers=headers, json=payload, timeout=30)
|
||||
|
||||
# 打印响应头信息
|
||||
print(f"\n📋 响应信息:")
|
||||
print(f" HTTP状态码: {response.status_code}")
|
||||
if 'X-Tt-Logid' in response.headers:
|
||||
print(f" RequestId: {response.headers['X-Tt-Logid']}")
|
||||
if 'X-Request-Id' in response.headers:
|
||||
print(f" X-Request-Id: {response.headers['X-Request-Id']}")
|
||||
|
||||
data = response.json()
|
||||
|
||||
# 打印完整响应
|
||||
print(f"\n📄 完整响应:")
|
||||
print(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
print()
|
||||
|
||||
if data.get("code") == 3000:
|
||||
# 成功:解码并保存音频
|
||||
audio_data = base64.b64decode(data["data"])
|
||||
with open(output_file, "wb") as f:
|
||||
f.write(audio_data)
|
||||
|
||||
file_size = len(audio_data) / 1024 # KB
|
||||
print(f"✅ 语音合成成功!")
|
||||
print(f" 输出: {output_file} ({file_size:.1f} KB)")
|
||||
return output_file
|
||||
else:
|
||||
error_msg = data.get("message", "未知错误")
|
||||
reqid = data.get("reqid", "未知")
|
||||
raise Exception(f"TTS 失败\n 错误码: {data.get('code')}\n 错误信息: {error_msg}\n RequestId: {reqid}")
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
raise Exception("请求超时,请检查网络连接")
|
||||
except Exception as e:
|
||||
raise Exception(f"TTS 调用失败: {str(e)}")
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行工具"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="豆包语音转换工具")
|
||||
subparsers = parser.add_subparsers(dest="command", help="选择功能")
|
||||
|
||||
# TTS 命令
|
||||
tts_parser = subparsers.add_parser("tts", help="文字转语音")
|
||||
tts_parser.add_argument("text", help="要转换的文字")
|
||||
tts_parser.add_argument("-o", "--output", default="output.mp3", help="输出音频文件(默认: output.mp3)")
|
||||
tts_parser.add_argument("-v", "--voice", default="BV700_V2_streaming",
|
||||
help="音色类型(默认: BV700_V2_streaming 通用女声)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
try:
|
||||
converter = DoubaoVoiceConverter()
|
||||
|
||||
if args.command == "tts":
|
||||
converter.text_to_speech(args.text, args.output, args.voice)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 错误: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user