skills/ → skills-dev(9), skills-req(10), skills-ops(4), skills-integration(8), skills-biz(4), skills-workflow(7) generate-marketplace.py 改为自动扫描所有 skills-* 目录。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
172 lines
5.5 KiB
Python
Executable File
172 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
豆包语音转换工具
|
||
支持:文字转语音 (TTS)
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import base64
|
||
import requests
|
||
from pathlib import Path
|
||
|
||
|
||
class DoubaoVoiceConverter:
|
||
"""豆包语音转换工具类"""
|
||
|
||
def __init__(self):
|
||
# 从环境变量读取配置
|
||
self.app_id = os.environ.get("DOUBAO_APP_ID")
|
||
self.access_token = os.environ.get("DOUBAO_ACCESS_TOKEN")
|
||
|
||
if not self.app_id or not self.access_token:
|
||
raise ValueError(
|
||
"请先设置环境变量:\n"
|
||
"export DOUBAO_APP_ID='your_app_id'\n"
|
||
"export DOUBAO_ACCESS_TOKEN='your_access_token'"
|
||
)
|
||
|
||
# API版本选择: V1 (默认, 支持基础音色) 或 V3 (豆包2.0, 需额外配置)
|
||
self.use_v3 = os.environ.get("DOUBAO_USE_V3", "false").lower() == "true"
|
||
|
||
if self.use_v3:
|
||
self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
|
||
self.resource_id = os.environ.get("DOUBAO_RESOURCE_ID", "volc.bigmodel.tts")
|
||
else:
|
||
# V1 API - 稳定可用,支持基础音色
|
||
self.tts_url = "https://openspeech.bytedance.com/api/v1/tts"
|
||
|
||
def text_to_speech(
|
||
self,
|
||
text: str,
|
||
output_file: str = "output.mp3",
|
||
voice_type: str = "BV700_V2_streaming"
|
||
) -> str:
|
||
"""
|
||
文字转语音 (TTS)
|
||
|
||
Args:
|
||
text: 要转换的文字
|
||
output_file: 输出音频文件路径
|
||
voice_type: 音色类型
|
||
- BV700_V2_streaming: 通用女声(推荐)
|
||
- BV701_V2_streaming: 通用男声
|
||
- BV406_streaming: 温柔女声
|
||
- BV158_streaming: 活泼女声
|
||
- BV115_streaming: 磁性男声
|
||
|
||
Returns:
|
||
str: 输出文件路径
|
||
"""
|
||
print(f"📝 文字转语音中...")
|
||
print(f" 文字: {text[:50]}{'...' if len(text) > 50 else ''}")
|
||
print(f" 音色: {voice_type}")
|
||
|
||
headers = {
|
||
"Authorization": f"Bearer;{self.access_token}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
# V3 API需要Resource-Id (如果启用)
|
||
if self.use_v3:
|
||
headers["Resource-Id"] = self.resource_id
|
||
|
||
payload = {
|
||
"app": {
|
||
"appid": self.app_id,
|
||
"token": self.access_token,
|
||
"cluster": "volcano_tts"
|
||
},
|
||
"user": {
|
||
"uid": "user_001"
|
||
},
|
||
"audio": {
|
||
"voice_type": voice_type,
|
||
"encoding": "mp3",
|
||
"speed_ratio": 1.0,
|
||
"volume_ratio": 1.0,
|
||
"pitch_ratio": 1.0
|
||
},
|
||
"request": {
|
||
"reqid": f"tts_{os.urandom(8).hex()}",
|
||
"text": text,
|
||
"text_type": "plain",
|
||
"operation": "query"
|
||
}
|
||
}
|
||
|
||
try:
|
||
response = requests.post(self.tts_url, headers=headers, json=payload, timeout=30)
|
||
|
||
# 打印响应头信息
|
||
print(f"\n📋 响应信息:")
|
||
print(f" HTTP状态码: {response.status_code}")
|
||
if 'X-Tt-Logid' in response.headers:
|
||
print(f" RequestId: {response.headers['X-Tt-Logid']}")
|
||
if 'X-Request-Id' in response.headers:
|
||
print(f" X-Request-Id: {response.headers['X-Request-Id']}")
|
||
|
||
data = response.json()
|
||
|
||
# 打印完整响应
|
||
print(f"\n📄 完整响应:")
|
||
print(json.dumps(data, indent=2, ensure_ascii=False))
|
||
print()
|
||
|
||
if data.get("code") == 3000:
|
||
# 成功:解码并保存音频
|
||
audio_data = base64.b64decode(data["data"])
|
||
with open(output_file, "wb") as f:
|
||
f.write(audio_data)
|
||
|
||
file_size = len(audio_data) / 1024 # KB
|
||
print(f"✅ 语音合成成功!")
|
||
print(f" 输出: {output_file} ({file_size:.1f} KB)")
|
||
return output_file
|
||
else:
|
||
error_msg = data.get("message", "未知错误")
|
||
reqid = data.get("reqid", "未知")
|
||
raise Exception(f"TTS 失败\n 错误码: {data.get('code')}\n 错误信息: {error_msg}\n RequestId: {reqid}")
|
||
|
||
except requests.exceptions.Timeout:
|
||
raise Exception("请求超时,请检查网络连接")
|
||
except Exception as e:
|
||
raise Exception(f"TTS 调用失败: {str(e)}")
|
||
|
||
|
||
|
||
def main():
|
||
"""命令行工具"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description="豆包语音转换工具")
|
||
subparsers = parser.add_subparsers(dest="command", help="选择功能")
|
||
|
||
# TTS 命令
|
||
tts_parser = subparsers.add_parser("tts", help="文字转语音")
|
||
tts_parser.add_argument("text", help="要转换的文字")
|
||
tts_parser.add_argument("-o", "--output", default="output.mp3", help="输出音频文件(默认: output.mp3)")
|
||
tts_parser.add_argument("-v", "--voice", default="BV700_V2_streaming",
|
||
help="音色类型(默认: BV700_V2_streaming 通用女声)")
|
||
|
||
args = parser.parse_args()
|
||
|
||
if not args.command:
|
||
parser.print_help()
|
||
return
|
||
|
||
try:
|
||
converter = DoubaoVoiceConverter()
|
||
|
||
if args.command == "tts":
|
||
converter.text_to_speech(args.text, args.output, args.voice)
|
||
|
||
except Exception as e:
|
||
print(f"❌ 错误: {e}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|