| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242 |
- #!/usr/bin/env node
- /**
- * tts-doubao.mjs · 豆包语音 TTS(火山引擎 openspeech)
- *
- * 用法:
- * node scripts/tts-doubao.mjs --text "你好" --out demo.mp3
- * node scripts/tts-doubao.mjs --text-file script.txt --out out.mp3 --speed 1.0
- *
- * 输出:
- * - mp3 文件写到 --out 路径
- * - stdout 打印一行 JSON: {"path":"...","duration":12.34,"bytes":54321}
- *
- * 依赖:Node 18+(自带 fetch/crypto)、ffprobe(测时长,brew install ffmpeg)
- *
- * env(自动从 skill 根目录 .env 读取,也可走 process.env 覆盖):
- * DOUBAO_TTS_API_KEY 可选(新版 API Key 鉴权)
- * DOUBAO_APP_ID 可选(控制台 App ID,与 DOUBAO_ACCESS_KEY 配套)
- * DOUBAO_ACCESS_KEY 可选(控制台 Access Token,与 DOUBAO_APP_ID 配套)
- * DOUBAO_TTS_VOICE_ID 必填(音色 id)
- * DOUBAO_TTS_RESOURCE_ID 可选(默认按音色自动推断)
- * DOUBAO_TTS_ENDPOINT 默认 https://openspeech.bytedance.com/api/v3/tts/unidirectional
- */
- import fs from 'node:fs';
- import path from 'node:path';
- import { execFileSync } from 'node:child_process';
- import { fileURLToPath } from 'node:url';
- import { randomUUID } from 'node:crypto';
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
- const SKILL_ROOT = path.resolve(__dirname, '..');
- function loadEnv() {
- const envPath = path.join(SKILL_ROOT, '.env');
- if (!fs.existsSync(envPath)) return;
- const text = fs.readFileSync(envPath, 'utf8');
- for (const line of text.split('\n')) {
- const trimmed = line.trim();
- if (!trimmed || trimmed.startsWith('#')) continue;
- const idx = trimmed.indexOf('=');
- if (idx < 0) continue;
- const key = trimmed.slice(0, idx).trim();
- let val = trimmed.slice(idx + 1).trim();
- if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
- val = val.slice(1, -1);
- }
- if (!(key in process.env)) process.env[key] = val;
- }
- }
- loadEnv();
- function parseArgs(argv) {
- const args = { speed: '1.0', encoding: 'mp3' };
- for (let i = 2; i < argv.length; i++) {
- const a = argv[i];
- if (a === '--text') args.text = argv[++i];
- else if (a === '--text-file') args.textFile = argv[++i];
- else if (a === '--out') args.out = argv[++i];
- else if (a === '--speed') args.speed = argv[++i];
- else if (a === '--voice') args.voice = argv[++i];
- else if (a === '--encoding') args.encoding = argv[++i];
- else if (a === '--help' || a === '-h') args.help = true;
- }
- return args;
- }
- function usage() {
- console.error(`
- tts-doubao.mjs · 豆包语音 TTS
- --text <str> 要合成的文本
- --text-file <path> 从文件读取文本(与 --text 二选一)
- --out <path> 输出 mp3 路径(必填)
- --speed <float> 语速倍率,默认 1.0(0.5-2.0)
- --voice <voice_id> 覆盖 .env 里的音色 id
- --encoding <ext> mp3 / wav / pcm,默认 mp3
- `.trim());
- process.exit(1);
- }
- function getDuration(filePath) {
- try {
- const out = execFileSync('ffprobe', [
- '-v', 'error',
- '-show_entries', 'format=duration',
- '-of', 'default=noprint_wrappers=1:nokey=1',
- filePath,
- ], { encoding: 'utf8' });
- return parseFloat(out.trim());
- } catch (e) {
- return null;
- }
- }
- function inferResourceId(voiceId) {
- if (voiceId.startsWith('S_')) return 'seed-icl-1.0';
- if (voiceId.includes('uranus')) return 'seed-tts-2.0';
- return 'seed-tts-1.0';
- }
- function speedToSpeechRate(speed) {
- const ratio = parseFloat(speed);
- if (!Number.isFinite(ratio)) return 0;
- return Math.max(-50, Math.min(100, Math.round((ratio - 1) * 100)));
- }
- function buildAuthHeaders({ requestId, resourceId }) {
- const apiKey = process.env.DOUBAO_TTS_API_KEY;
- const appId = process.env.DOUBAO_APP_ID;
- const accessKey = process.env.DOUBAO_ACCESS_KEY;
- const headers = {
- 'Content-Type': 'application/json',
- 'X-Api-Resource-Id': resourceId,
- 'X-Api-Request-Id': requestId,
- };
- if (apiKey) {
- headers['X-Api-Key'] = apiKey;
- return headers;
- }
- if (!appId) throw new Error('缺 DOUBAO_TTS_API_KEY 或 DOUBAO_APP_ID(检查 .env)');
- if (!accessKey) throw new Error('缺 DOUBAO_ACCESS_KEY(检查 .env)');
- headers['X-Api-App-Id'] = appId;
- headers['X-Api-Access-Key'] = accessKey;
- return headers;
- }
- async function readV3Audio(res) {
- const text = await res.text();
- const chunks = [];
- let finalCode = null;
- let finalMessage = '';
- for (const line of text.split(/\r?\n/)) {
- const trimmed = line.trim();
- if (!trimmed) continue;
- let json;
- try {
- json = JSON.parse(trimmed);
- } catch (e) {
- throw new Error(`API 响应行不是 JSON:${trimmed.slice(0, 200)}`);
- }
- const code = json.code ?? 0;
- if (code === 20000000) {
- finalCode = code;
- finalMessage = json.message || '';
- break;
- }
- if (code !== 0) {
- throw new Error(`API 返回错误 code=${code} msg=${json.message || JSON.stringify(json)}`);
- }
- if (json.data) chunks.push(Buffer.from(json.data, 'base64'));
- }
- if (!chunks.length) {
- const detail = finalCode ? `结束码 ${finalCode} ${finalMessage}` : text.slice(0, 500);
- throw new Error(`API 响应无音频数据:${detail}`);
- }
- return Buffer.concat(chunks);
- }
- async function tts({ text, voice, speed, encoding }) {
- const endpoint = process.env.DOUBAO_TTS_ENDPOINT || 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
- const voiceId = voice || process.env.DOUBAO_TTS_VOICE_ID || process.env.DOUBAO_SPEAKER;
- const resourceId = process.env.DOUBAO_TTS_RESOURCE_ID || inferResourceId(voiceId || '');
- const requestId = randomUUID();
- if (!voiceId) throw new Error('缺 DOUBAO_TTS_VOICE_ID(检查 .env 或用 --voice 传)');
- const body = {
- user: { uid: 'huashu-design' },
- req_params: {
- text,
- speaker: voiceId,
- audio_params: {
- format: encoding,
- sample_rate: 24000,
- speech_rate: speedToSpeechRate(speed),
- },
- },
- };
- const res = await fetch(endpoint, {
- method: 'POST',
- headers: buildAuthHeaders({ requestId, resourceId }),
- body: JSON.stringify(body),
- });
- if (!res.ok) {
- const errText = await res.text();
- throw new Error(`HTTP ${res.status}: ${errText.slice(0, 500)}`);
- }
- return readV3Audio(res);
- }
- async function main() {
- const args = parseArgs(process.argv);
- if (args.help) usage();
- let text = args.text;
- if (!text && args.textFile) {
- text = fs.readFileSync(args.textFile, 'utf8').trim();
- }
- if (!text) {
- console.error('错:缺 --text 或 --text-file');
- usage();
- }
- if (!args.out) {
- console.error('错:缺 --out');
- usage();
- }
- const outPath = path.resolve(args.out);
- fs.mkdirSync(path.dirname(outPath), { recursive: true });
- const audio = await tts({
- text,
- voice: args.voice,
- speed: args.speed,
- encoding: args.encoding,
- });
- fs.writeFileSync(outPath, audio);
- const duration = getDuration(outPath);
- const result = {
- path: outPath,
- bytes: audio.length,
- duration,
- text_chars: text.length,
- };
- console.log(JSON.stringify(result));
- }
- main().catch((err) => {
- console.error(`TTS 失败:${err.message}`);
- process.exit(1);
- });
|