tts-doubao.mjs 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #!/usr/bin/env node
  2. /**
  3. * tts-doubao.mjs · 豆包语音 TTS(火山引擎 openspeech)
  4. *
  5. * 用法:
  6. * node scripts/tts-doubao.mjs --text "你好" --out demo.mp3
  7. * node scripts/tts-doubao.mjs --text-file script.txt --out out.mp3 --speed 1.0
  8. *
  9. * 输出:
  10. * - mp3 文件写到 --out 路径
  11. * - stdout 打印一行 JSON: {"path":"...","duration":12.34,"bytes":54321}
  12. *
  13. * 依赖:Node 18+(自带 fetch/crypto)、ffprobe(测时长,brew install ffmpeg)
  14. *
  15. * env(自动从 skill 根目录 .env 读取,也可走 process.env 覆盖):
  16. * DOUBAO_TTS_API_KEY 必填
  17. * DOUBAO_TTS_VOICE_ID 必填(音色 id)
  18. * DOUBAO_TTS_CLUSTER 默认 volcano_icl
  19. * DOUBAO_TTS_ENDPOINT 默认 https://openspeech.bytedance.com/api/v1/tts
  20. */
  21. import fs from 'node:fs';
  22. import path from 'node:path';
  23. import { execFileSync } from 'node:child_process';
  24. import { fileURLToPath } from 'node:url';
  25. import { randomUUID } from 'node:crypto';
  26. const __dirname = path.dirname(fileURLToPath(import.meta.url));
  27. const SKILL_ROOT = path.resolve(__dirname, '..');
  28. function loadEnv() {
  29. const envPath = path.join(SKILL_ROOT, '.env');
  30. if (!fs.existsSync(envPath)) return;
  31. const text = fs.readFileSync(envPath, 'utf8');
  32. for (const line of text.split('\n')) {
  33. const trimmed = line.trim();
  34. if (!trimmed || trimmed.startsWith('#')) continue;
  35. const idx = trimmed.indexOf('=');
  36. if (idx < 0) continue;
  37. const key = trimmed.slice(0, idx).trim();
  38. let val = trimmed.slice(idx + 1).trim();
  39. if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
  40. val = val.slice(1, -1);
  41. }
  42. if (!(key in process.env)) process.env[key] = val;
  43. }
  44. }
  45. loadEnv();
  46. function parseArgs(argv) {
  47. const args = { speed: '1.0', encoding: 'mp3' };
  48. for (let i = 2; i < argv.length; i++) {
  49. const a = argv[i];
  50. if (a === '--text') args.text = argv[++i];
  51. else if (a === '--text-file') args.textFile = argv[++i];
  52. else if (a === '--out') args.out = argv[++i];
  53. else if (a === '--speed') args.speed = argv[++i];
  54. else if (a === '--voice') args.voice = argv[++i];
  55. else if (a === '--encoding') args.encoding = argv[++i];
  56. else if (a === '--help' || a === '-h') args.help = true;
  57. }
  58. return args;
  59. }
  60. function usage() {
  61. console.error(`
  62. tts-doubao.mjs · 豆包语音 TTS
  63. --text <str> 要合成的文本
  64. --text-file <path> 从文件读取文本(与 --text 二选一)
  65. --out <path> 输出 mp3 路径(必填)
  66. --speed <float> 语速倍率,默认 1.0(0.5-2.0)
  67. --voice <voice_id> 覆盖 .env 里的音色 id
  68. --encoding <ext> mp3 / wav / pcm,默认 mp3
  69. `.trim());
  70. process.exit(1);
  71. }
  72. function getDuration(filePath) {
  73. try {
  74. const out = execFileSync('ffprobe', [
  75. '-v', 'error',
  76. '-show_entries', 'format=duration',
  77. '-of', 'default=noprint_wrappers=1:nokey=1',
  78. filePath,
  79. ], { encoding: 'utf8' });
  80. return parseFloat(out.trim());
  81. } catch (e) {
  82. return null;
  83. }
  84. }
  85. async function tts({ text, voice, speed, encoding }) {
  86. const apiKey = process.env.DOUBAO_TTS_API_KEY;
  87. const cluster = process.env.DOUBAO_TTS_CLUSTER || 'volcano_icl';
  88. const endpoint = process.env.DOUBAO_TTS_ENDPOINT || 'https://openspeech.bytedance.com/api/v1/tts';
  89. const voiceId = voice || process.env.DOUBAO_TTS_VOICE_ID;
  90. if (!apiKey) throw new Error('缺 DOUBAO_TTS_API_KEY(检查 .env)');
  91. if (!voiceId) throw new Error('缺 DOUBAO_TTS_VOICE_ID(检查 .env 或用 --voice 传)');
  92. const body = {
  93. app: { cluster },
  94. user: { uid: 'huashu-design' },
  95. audio: {
  96. voice_type: voiceId,
  97. encoding,
  98. speed_ratio: parseFloat(speed),
  99. },
  100. request: {
  101. reqid: randomUUID(),
  102. text,
  103. operation: 'query',
  104. },
  105. };
  106. const res = await fetch(endpoint, {
  107. method: 'POST',
  108. headers: {
  109. 'x-api-key': apiKey,
  110. 'Content-Type': 'application/json',
  111. },
  112. body: JSON.stringify(body),
  113. });
  114. if (!res.ok) {
  115. const errText = await res.text();
  116. throw new Error(`HTTP ${res.status}: ${errText.slice(0, 500)}`);
  117. }
  118. const json = await res.json();
  119. // 豆包标准返回:{ code, message, data: "<base64 audio>", ... }
  120. // code === 3000 表示成功
  121. if (json.code !== undefined && json.code !== 3000) {
  122. throw new Error(`API 返回错误 code=${json.code} msg=${json.message || JSON.stringify(json)}`);
  123. }
  124. if (!json.data) {
  125. throw new Error(`API 响应无 data 字段:${JSON.stringify(json).slice(0, 500)}`);
  126. }
  127. return Buffer.from(json.data, 'base64');
  128. }
  129. async function main() {
  130. const args = parseArgs(process.argv);
  131. if (args.help) usage();
  132. let text = args.text;
  133. if (!text && args.textFile) {
  134. text = fs.readFileSync(args.textFile, 'utf8').trim();
  135. }
  136. if (!text) {
  137. console.error('错:缺 --text 或 --text-file');
  138. usage();
  139. }
  140. if (!args.out) {
  141. console.error('错:缺 --out');
  142. usage();
  143. }
  144. const outPath = path.resolve(args.out);
  145. fs.mkdirSync(path.dirname(outPath), { recursive: true });
  146. const audio = await tts({
  147. text,
  148. voice: args.voice,
  149. speed: args.speed,
  150. encoding: args.encoding,
  151. });
  152. fs.writeFileSync(outPath, audio);
  153. const duration = getDuration(outPath);
  154. const result = {
  155. path: outPath,
  156. bytes: audio.length,
  157. duration,
  158. text_chars: text.length,
  159. };
  160. console.log(JSON.stringify(result));
  161. }
  162. main().catch((err) => {
  163. console.error(`TTS 失败:${err.message}`);
  164. process.exit(1);
  165. });