Files
ai-proj-helper/skills-integration/doubao-voice-plugin/scripts/voice_converter.py
John Qiu 712063071c refactor: 通用技能按类别拆分为独立目录
skills/ → skills-dev(9), skills-req(10), skills-ops(4),
skills-integration(8), skills-biz(4), skills-workflow(7)

generate-marketplace.py 改为自动扫描所有 skills-* 目录。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 11:31:58 +10:30

172 lines
5.5 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
豆包语音转换工具
支持:文字转语音 (TTS)
"""
import os
import sys
import json
import base64
import requests
from pathlib import Path
class DoubaoVoiceConverter:
"""豆包语音转换工具类"""
def __init__(self):
# 从环境变量读取配置
self.app_id = os.environ.get("DOUBAO_APP_ID")
self.access_token = os.environ.get("DOUBAO_ACCESS_TOKEN")
if not self.app_id or not self.access_token:
raise ValueError(
"请先设置环境变量:\n"
"export DOUBAO_APP_ID='your_app_id'\n"
"export DOUBAO_ACCESS_TOKEN='your_access_token'"
)
# API版本选择: V1 (默认, 支持基础音色) 或 V3 (豆包2.0, 需额外配置)
self.use_v3 = os.environ.get("DOUBAO_USE_V3", "false").lower() == "true"
if self.use_v3:
self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
self.resource_id = os.environ.get("DOUBAO_RESOURCE_ID", "volc.bigmodel.tts")
else:
# V1 API - 稳定可用,支持基础音色
self.tts_url = "https://openspeech.bytedance.com/api/v1/tts"
def text_to_speech(
self,
text: str,
output_file: str = "output.mp3",
voice_type: str = "BV700_V2_streaming"
) -> str:
"""
文字转语音 (TTS)
Args:
text: 要转换的文字
output_file: 输出音频文件路径
voice_type: 音色类型
- BV700_V2_streaming: 通用女声(推荐)
- BV701_V2_streaming: 通用男声
- BV406_streaming: 温柔女声
- BV158_streaming: 活泼女声
- BV115_streaming: 磁性男声
Returns:
str: 输出文件路径
"""
print(f"📝 文字转语音中...")
print(f" 文字: {text[:50]}{'...' if len(text) > 50 else ''}")
print(f" 音色: {voice_type}")
headers = {
"Authorization": f"Bearer;{self.access_token}",
"Content-Type": "application/json"
}
# V3 API需要Resource-Id (如果启用)
if self.use_v3:
headers["Resource-Id"] = self.resource_id
payload = {
"app": {
"appid": self.app_id,
"token": self.access_token,
"cluster": "volcano_tts"
},
"user": {
"uid": "user_001"
},
"audio": {
"voice_type": voice_type,
"encoding": "mp3",
"speed_ratio": 1.0,
"volume_ratio": 1.0,
"pitch_ratio": 1.0
},
"request": {
"reqid": f"tts_{os.urandom(8).hex()}",
"text": text,
"text_type": "plain",
"operation": "query"
}
}
try:
response = requests.post(self.tts_url, headers=headers, json=payload, timeout=30)
# 打印响应头信息
print(f"\n📋 响应信息:")
print(f" HTTP状态码: {response.status_code}")
if 'X-Tt-Logid' in response.headers:
print(f" RequestId: {response.headers['X-Tt-Logid']}")
if 'X-Request-Id' in response.headers:
print(f" X-Request-Id: {response.headers['X-Request-Id']}")
data = response.json()
# 打印完整响应
print(f"\n📄 完整响应:")
print(json.dumps(data, indent=2, ensure_ascii=False))
print()
if data.get("code") == 3000:
# 成功:解码并保存音频
audio_data = base64.b64decode(data["data"])
with open(output_file, "wb") as f:
f.write(audio_data)
file_size = len(audio_data) / 1024 # KB
print(f"✅ 语音合成成功!")
print(f" 输出: {output_file} ({file_size:.1f} KB)")
return output_file
else:
error_msg = data.get("message", "未知错误")
reqid = data.get("reqid", "未知")
raise Exception(f"TTS 失败\n 错误码: {data.get('code')}\n 错误信息: {error_msg}\n RequestId: {reqid}")
except requests.exceptions.Timeout:
raise Exception("请求超时,请检查网络连接")
except Exception as e:
raise Exception(f"TTS 调用失败: {str(e)}")
def main():
"""命令行工具"""
import argparse
parser = argparse.ArgumentParser(description="豆包语音转换工具")
subparsers = parser.add_subparsers(dest="command", help="选择功能")
# TTS 命令
tts_parser = subparsers.add_parser("tts", help="文字转语音")
tts_parser.add_argument("text", help="要转换的文字")
tts_parser.add_argument("-o", "--output", default="output.mp3", help="输出音频文件(默认: output.mp3")
tts_parser.add_argument("-v", "--voice", default="BV700_V2_streaming",
help="音色类型(默认: BV700_V2_streaming 通用女声)")
args = parser.parse_args()
if not args.command:
parser.print_help()
return
try:
converter = DoubaoVoiceConverter()
if args.command == "tts":
converter.text_to_speech(args.text, args.output, args.voice)
except Exception as e:
print(f"❌ 错误: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()