parse-worker.ts 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. /**
  2. * Parse Worker
  3. *
  4. * Runs tree-sitter parsing in a separate thread so the main thread
  5. * stays unblocked and the UI animation renders smoothly.
  6. */
  7. import { parentPort } from 'worker_threads';
  8. import { extractFromSource } from './tree-sitter';
  9. import { detectLanguage, loadGrammarsForLanguages, resetParser } from './grammars';
  10. import type { Language, ExtractionResult } from '../types';
  11. // Emscripten prints `Aborted()` (and a follow-up RuntimeError diag
  12. // line) directly to stderr when WASM aborts — before the JS catch
  13. // runs. Worker stderr is inherited by the parent, so each crash leaks
  14. // a noise line to the user's terminal even though the JS layer
  15. // already handles the failure cleanly. Filter these specific lines
  16. // out at the source. Real diagnostic output (anything we log
  17. // ourselves) goes through console.* / parentPort and is unaffected.
  18. //
  19. // Caveats deliberately accepted:
  20. // - Per-call match: each `write()` call is matched in isolation.
  21. // If Emscripten ever splits `Aborted(` across two write()s (it
  22. // doesn't today — synchronous abort prints the whole line at
  23. // once via libc puts) the first fragment would leak. Buffering
  24. // across calls would add complexity for a hypothetical case.
  25. // - Substring exactness: the prefix `Aborted(` is the literal
  26. // Emscripten signature. Any user code that legitimately writes
  27. // a stderr line starting with that prefix would also be filtered;
  28. // in practice no real diagnostic does.
  29. {
  30. const realWrite = process.stderr.write.bind(process.stderr);
  31. process.stderr.write = ((
  32. chunk: string | Uint8Array,
  33. encoding?: BufferEncoding | ((err?: Error | null) => void),
  34. cb?: (err?: Error | null) => void
  35. ): boolean => {
  36. const s = typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf-8');
  37. if (
  38. s.startsWith('Aborted(') ||
  39. s.includes('Build with -sASSERTIONS for more info')
  40. ) {
  41. // Honour the Writable stream contract: callbacks must always
  42. // fire even when the write is suppressed, or upstream code
  43. // waiting on the drain signal would hang. Both overload forms
  44. // are handled (`(chunk, cb)` and `(chunk, encoding, cb)`).
  45. if (typeof encoding === 'function') encoding();
  46. else if (cb) cb();
  47. return true;
  48. }
  49. return realWrite(chunk as never, encoding as never, cb as never);
  50. }) as typeof process.stderr.write;
  51. }
  52. const PARSER_RESET_INTERVAL = 5000;
  53. const parseCounts = new Map<Language, number>();
  54. parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: string; content?: string; languages?: Language[]; frameworkNames?: string[]; language?: Language }) => {
  55. if (msg.type === 'load-grammars') {
  56. await loadGrammarsForLanguages(msg.languages!);
  57. parentPort!.postMessage({ type: 'grammars-loaded' });
  58. } else if (msg.type === 'parse') {
  59. const { id, filePath, content, frameworkNames } = msg;
  60. try {
  61. // The main thread resolves the language (it holds the project's
  62. // codegraph.json extension overrides) and sends it; fall back to detection
  63. // for older callers / safety.
  64. const language = msg.language ?? detectLanguage(filePath!, content);
  65. const result: ExtractionResult = extractFromSource(filePath!, content!, language, frameworkNames);
  66. // Periodic parser reset to reclaim WASM heap memory
  67. const count = (parseCounts.get(language) ?? 0) + 1;
  68. parseCounts.set(language, count);
  69. if (count % PARSER_RESET_INTERVAL === 0) {
  70. resetParser(language);
  71. }
  72. parentPort!.postMessage({ type: 'parse-result', id, result });
  73. } catch (err) {
  74. const message = err instanceof Error ? err.message : String(err);
  75. // WASM memory errors leave the module in a corrupted state — all
  76. // subsequent parses would also fail (cascading failures). Crash the
  77. // worker so the main thread spawns a fresh one with a clean heap.
  78. if (message.includes('memory access out of bounds') || message.includes('out of memory')) {
  79. process.exit(1);
  80. }
  81. parentPort!.postMessage({
  82. type: 'parse-result',
  83. id,
  84. result: {
  85. nodes: [],
  86. edges: [],
  87. unresolvedReferences: [],
  88. errors: [{ message: `Parse worker error: ${message}`, filePath: filePath!, severity: 'error', code: 'parse_error' }],
  89. durationMs: 0,
  90. } satisfies ExtractionResult,
  91. });
  92. }
  93. } else if (msg.type === 'shutdown') {
  94. parentPort!.postMessage({ type: 'shutdown-ack' });
  95. }
  96. });