| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- #!/usr/bin/env node
- /**
- * tts-doubao.mjs · 豆包语音 TTS(火山引擎 openspeech)
- *
- * 用法:
- * node scripts/tts-doubao.mjs --text "你好" --out demo.mp3
- * node scripts/tts-doubao.mjs --text-file script.txt --out out.mp3 --speed 1.0
- *
- * 输出:
- * - mp3 文件写到 --out 路径
- * - stdout 打印一行 JSON: {"path":"...","duration":12.34,"bytes":54321}
- *
- * 依赖:Node 18+(自带 fetch/crypto)、ffprobe(测时长,brew install ffmpeg)
- *
- * env(自动从 skill 根目录 .env 读取,也可走 process.env 覆盖):
- * DOUBAO_TTS_API_KEY 必填
- * DOUBAO_TTS_VOICE_ID 必填(音色 id)
- * DOUBAO_TTS_CLUSTER 默认 volcano_icl
- * DOUBAO_TTS_ENDPOINT 默认 https://openspeech.bytedance.com/api/v1/tts
- */
- import fs from 'node:fs';
- import path from 'node:path';
- import { execFileSync } from 'node:child_process';
- import { fileURLToPath } from 'node:url';
- import { randomUUID } from 'node:crypto';
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
- const SKILL_ROOT = path.resolve(__dirname, '..');
- function loadEnv() {
- const envPath = path.join(SKILL_ROOT, '.env');
- if (!fs.existsSync(envPath)) return;
- const text = fs.readFileSync(envPath, 'utf8');
- for (const line of text.split('\n')) {
- const trimmed = line.trim();
- if (!trimmed || trimmed.startsWith('#')) continue;
- const idx = trimmed.indexOf('=');
- if (idx < 0) continue;
- const key = trimmed.slice(0, idx).trim();
- let val = trimmed.slice(idx + 1).trim();
- if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
- val = val.slice(1, -1);
- }
- if (!(key in process.env)) process.env[key] = val;
- }
- }
- loadEnv();
- function parseArgs(argv) {
- const args = { speed: '1.0', encoding: 'mp3' };
- for (let i = 2; i < argv.length; i++) {
- const a = argv[i];
- if (a === '--text') args.text = argv[++i];
- else if (a === '--text-file') args.textFile = argv[++i];
- else if (a === '--out') args.out = argv[++i];
- else if (a === '--speed') args.speed = argv[++i];
- else if (a === '--voice') args.voice = argv[++i];
- else if (a === '--encoding') args.encoding = argv[++i];
- else if (a === '--help' || a === '-h') args.help = true;
- }
- return args;
- }
- function usage() {
- console.error(`
- tts-doubao.mjs · 豆包语音 TTS
- --text <str> 要合成的文本
- --text-file <path> 从文件读取文本(与 --text 二选一)
- --out <path> 输出 mp3 路径(必填)
- --speed <float> 语速倍率,默认 1.0(0.5-2.0)
- --voice <voice_id> 覆盖 .env 里的音色 id
- --encoding <ext> mp3 / wav / pcm,默认 mp3
- `.trim());
- process.exit(1);
- }
- function getDuration(filePath) {
- try {
- const out = execFileSync('ffprobe', [
- '-v', 'error',
- '-show_entries', 'format=duration',
- '-of', 'default=noprint_wrappers=1:nokey=1',
- filePath,
- ], { encoding: 'utf8' });
- return parseFloat(out.trim());
- } catch (e) {
- return null;
- }
- }
- async function tts({ text, voice, speed, encoding }) {
- const apiKey = process.env.DOUBAO_TTS_API_KEY;
- const cluster = process.env.DOUBAO_TTS_CLUSTER || 'volcano_icl';
- const endpoint = process.env.DOUBAO_TTS_ENDPOINT || 'https://openspeech.bytedance.com/api/v1/tts';
- const voiceId = voice || process.env.DOUBAO_TTS_VOICE_ID;
- if (!apiKey) throw new Error('缺 DOUBAO_TTS_API_KEY(检查 .env)');
- if (!voiceId) throw new Error('缺 DOUBAO_TTS_VOICE_ID(检查 .env 或用 --voice 传)');
- const body = {
- app: { cluster },
- user: { uid: 'huashu-design' },
- audio: {
- voice_type: voiceId,
- encoding,
- speed_ratio: parseFloat(speed),
- },
- request: {
- reqid: randomUUID(),
- text,
- operation: 'query',
- },
- };
- const res = await fetch(endpoint, {
- method: 'POST',
- headers: {
- 'x-api-key': apiKey,
- 'Content-Type': 'application/json',
- },
- body: JSON.stringify(body),
- });
- if (!res.ok) {
- const errText = await res.text();
- throw new Error(`HTTP ${res.status}: ${errText.slice(0, 500)}`);
- }
- const json = await res.json();
- // 豆包标准返回:{ code, message, data: "<base64 audio>", ... }
- // code === 3000 表示成功
- if (json.code !== undefined && json.code !== 3000) {
- throw new Error(`API 返回错误 code=${json.code} msg=${json.message || JSON.stringify(json)}`);
- }
- if (!json.data) {
- throw new Error(`API 响应无 data 字段:${JSON.stringify(json).slice(0, 500)}`);
- }
- return Buffer.from(json.data, 'base64');
- }
- async function main() {
- const args = parseArgs(process.argv);
- if (args.help) usage();
- let text = args.text;
- if (!text && args.textFile) {
- text = fs.readFileSync(args.textFile, 'utf8').trim();
- }
- if (!text) {
- console.error('错:缺 --text 或 --text-file');
- usage();
- }
- if (!args.out) {
- console.error('错:缺 --out');
- usage();
- }
- const outPath = path.resolve(args.out);
- fs.mkdirSync(path.dirname(outPath), { recursive: true });
- const audio = await tts({
- text,
- voice: args.voice,
- speed: args.speed,
- encoding: args.encoding,
- });
- fs.writeFileSync(outPath, audio);
- const duration = getDuration(outPath);
- const result = {
- path: outPath,
- bytes: audio.length,
- duration,
- text_chars: text.length,
- };
- console.log(JSON.stringify(result));
- }
- main().catch((err) => {
- console.error(`TTS 失败:${err.message}`);
- process.exit(1);
- });
|