1
0

tts-doubao.mjs 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #!/usr/bin/env node
  2. /**
  3. * tts-doubao.mjs · 豆包语音 TTS(火山引擎 openspeech)
  4. *
  5. * 用法:
  6. * node scripts/tts-doubao.mjs --text "你好" --out demo.mp3
  7. * node scripts/tts-doubao.mjs --text-file script.txt --out out.mp3 --speed 1.0
  8. *
  9. * 输出:
  10. * - mp3 文件写到 --out 路径
  11. * - stdout 打印一行 JSON: {"path":"...","duration":12.34,"bytes":54321}
  12. *
  13. * 依赖:Node 18+(自带 fetch/crypto)、ffprobe(测时长,brew install ffmpeg)
  14. *
  15. * env(自动从 skill 根目录 .env 读取,也可走 process.env 覆盖):
  16. * DOUBAO_TTS_API_KEY 可选(新版 API Key 鉴权)
  17. * DOUBAO_APP_ID 可选(控制台 App ID,与 DOUBAO_ACCESS_KEY 配套)
  18. * DOUBAO_ACCESS_KEY 可选(控制台 Access Token,与 DOUBAO_APP_ID 配套)
  19. * DOUBAO_TTS_VOICE_ID 必填(音色 id)
  20. * DOUBAO_TTS_RESOURCE_ID 可选(默认按音色自动推断)
  21. * DOUBAO_TTS_ENDPOINT 默认 https://openspeech.bytedance.com/api/v3/tts/unidirectional
  22. */
  23. import fs from 'node:fs';
  24. import path from 'node:path';
  25. import { execFileSync } from 'node:child_process';
  26. import { fileURLToPath } from 'node:url';
  27. import { randomUUID } from 'node:crypto';
  28. const __dirname = path.dirname(fileURLToPath(import.meta.url));
  29. const SKILL_ROOT = path.resolve(__dirname, '..');
  30. function loadEnv() {
  31. const envPath = path.join(SKILL_ROOT, '.env');
  32. if (!fs.existsSync(envPath)) return;
  33. const text = fs.readFileSync(envPath, 'utf8');
  34. for (const line of text.split('\n')) {
  35. const trimmed = line.trim();
  36. if (!trimmed || trimmed.startsWith('#')) continue;
  37. const idx = trimmed.indexOf('=');
  38. if (idx < 0) continue;
  39. const key = trimmed.slice(0, idx).trim();
  40. let val = trimmed.slice(idx + 1).trim();
  41. if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
  42. val = val.slice(1, -1);
  43. }
  44. if (!(key in process.env)) process.env[key] = val;
  45. }
  46. }
  47. loadEnv();
  48. function parseArgs(argv) {
  49. const args = { speed: '1.0', encoding: 'mp3' };
  50. for (let i = 2; i < argv.length; i++) {
  51. const a = argv[i];
  52. if (a === '--text') args.text = argv[++i];
  53. else if (a === '--text-file') args.textFile = argv[++i];
  54. else if (a === '--out') args.out = argv[++i];
  55. else if (a === '--speed') args.speed = argv[++i];
  56. else if (a === '--voice') args.voice = argv[++i];
  57. else if (a === '--encoding') args.encoding = argv[++i];
  58. else if (a === '--help' || a === '-h') args.help = true;
  59. }
  60. return args;
  61. }
  62. function usage() {
  63. console.error(`
  64. tts-doubao.mjs · 豆包语音 TTS
  65. --text <str> 要合成的文本
  66. --text-file <path> 从文件读取文本(与 --text 二选一)
  67. --out <path> 输出 mp3 路径(必填)
  68. --speed <float> 语速倍率,默认 1.0(0.5-2.0)
  69. --voice <voice_id> 覆盖 .env 里的音色 id
  70. --encoding <ext> mp3 / wav / pcm,默认 mp3
  71. `.trim());
  72. process.exit(1);
  73. }
  74. function getDuration(filePath) {
  75. try {
  76. const out = execFileSync('ffprobe', [
  77. '-v', 'error',
  78. '-show_entries', 'format=duration',
  79. '-of', 'default=noprint_wrappers=1:nokey=1',
  80. filePath,
  81. ], { encoding: 'utf8' });
  82. return parseFloat(out.trim());
  83. } catch (e) {
  84. return null;
  85. }
  86. }
  87. function inferResourceId(voiceId) {
  88. if (voiceId.startsWith('S_')) return 'seed-icl-1.0';
  89. if (voiceId.includes('uranus')) return 'seed-tts-2.0';
  90. return 'seed-tts-1.0';
  91. }
  92. function speedToSpeechRate(speed) {
  93. const ratio = parseFloat(speed);
  94. if (!Number.isFinite(ratio)) return 0;
  95. return Math.max(-50, Math.min(100, Math.round((ratio - 1) * 100)));
  96. }
  97. function buildAuthHeaders({ requestId, resourceId }) {
  98. const apiKey = process.env.DOUBAO_TTS_API_KEY;
  99. const appId = process.env.DOUBAO_APP_ID;
  100. const accessKey = process.env.DOUBAO_ACCESS_KEY;
  101. const headers = {
  102. 'Content-Type': 'application/json',
  103. 'X-Api-Resource-Id': resourceId,
  104. 'X-Api-Request-Id': requestId,
  105. };
  106. if (apiKey) {
  107. headers['X-Api-Key'] = apiKey;
  108. return headers;
  109. }
  110. if (!appId) throw new Error('缺 DOUBAO_TTS_API_KEY 或 DOUBAO_APP_ID(检查 .env)');
  111. if (!accessKey) throw new Error('缺 DOUBAO_ACCESS_KEY(检查 .env)');
  112. headers['X-Api-App-Id'] = appId;
  113. headers['X-Api-Access-Key'] = accessKey;
  114. return headers;
  115. }
  116. async function readV3Audio(res) {
  117. const text = await res.text();
  118. const chunks = [];
  119. let finalCode = null;
  120. let finalMessage = '';
  121. for (const line of text.split(/\r?\n/)) {
  122. const trimmed = line.trim();
  123. if (!trimmed) continue;
  124. let json;
  125. try {
  126. json = JSON.parse(trimmed);
  127. } catch (e) {
  128. throw new Error(`API 响应行不是 JSON:${trimmed.slice(0, 200)}`);
  129. }
  130. const code = json.code ?? 0;
  131. if (code === 20000000) {
  132. finalCode = code;
  133. finalMessage = json.message || '';
  134. break;
  135. }
  136. if (code !== 0) {
  137. throw new Error(`API 返回错误 code=${code} msg=${json.message || JSON.stringify(json)}`);
  138. }
  139. if (json.data) chunks.push(Buffer.from(json.data, 'base64'));
  140. }
  141. if (!chunks.length) {
  142. const detail = finalCode ? `结束码 ${finalCode} ${finalMessage}` : text.slice(0, 500);
  143. throw new Error(`API 响应无音频数据:${detail}`);
  144. }
  145. return Buffer.concat(chunks);
  146. }
  147. async function tts({ text, voice, speed, encoding }) {
  148. const endpoint = process.env.DOUBAO_TTS_ENDPOINT || 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
  149. const voiceId = voice || process.env.DOUBAO_TTS_VOICE_ID || process.env.DOUBAO_SPEAKER;
  150. const resourceId = process.env.DOUBAO_TTS_RESOURCE_ID || inferResourceId(voiceId || '');
  151. const requestId = randomUUID();
  152. if (!voiceId) throw new Error('缺 DOUBAO_TTS_VOICE_ID(检查 .env 或用 --voice 传)');
  153. const body = {
  154. user: { uid: 'huashu-design' },
  155. req_params: {
  156. text,
  157. speaker: voiceId,
  158. audio_params: {
  159. format: encoding,
  160. sample_rate: 24000,
  161. speech_rate: speedToSpeechRate(speed),
  162. },
  163. },
  164. };
  165. const res = await fetch(endpoint, {
  166. method: 'POST',
  167. headers: buildAuthHeaders({ requestId, resourceId }),
  168. body: JSON.stringify(body),
  169. });
  170. if (!res.ok) {
  171. const errText = await res.text();
  172. throw new Error(`HTTP ${res.status}: ${errText.slice(0, 500)}`);
  173. }
  174. return readV3Audio(res);
  175. }
  176. async function main() {
  177. const args = parseArgs(process.argv);
  178. if (args.help) usage();
  179. let text = args.text;
  180. if (!text && args.textFile) {
  181. text = fs.readFileSync(args.textFile, 'utf8').trim();
  182. }
  183. if (!text) {
  184. console.error('错:缺 --text 或 --text-file');
  185. usage();
  186. }
  187. if (!args.out) {
  188. console.error('错:缺 --out');
  189. usage();
  190. }
  191. const outPath = path.resolve(args.out);
  192. fs.mkdirSync(path.dirname(outPath), { recursive: true });
  193. const audio = await tts({
  194. text,
  195. voice: args.voice,
  196. speed: args.speed,
  197. encoding: args.encoding,
  198. });
  199. fs.writeFileSync(outPath, audio);
  200. const duration = getDuration(outPath);
  201. const result = {
  202. path: outPath,
  203. bytes: audio.length,
  204. duration,
  205. text_chars: text.length,
  206. };
  207. console.log(JSON.stringify(result));
  208. }
  209. main().catch((err) => {
  210. console.error(`TTS 失败:${err.message}`);
  211. process.exit(1);
  212. });