فهرست منبع

fix(mcp): exit on uncaught exception instead of orphaning/spinning at 100% CPU (#855)

The process-wide uncaughtException handler logged the error and kept running. For
the detached `serve --mcp` daemon that turned any escaped fault into an
unrecoverable orphan: nothing respawns it, and when logging the raw Error hit a
V8 source-position loop while lazily formatting `.stack`, the main thread wedged
at 100% CPU so even the PPID watchdog / idle-timer could no longer fire. Same
failure mode as #799, which only fixed the stdin-'error' trigger.

Restore Node's default fatal semantics: render a bounded, hang-proof line (name +
message only — never read `.stack`) then exit non-zero, so a fresh daemon starts
on the next connection. Extracted to src/bin/fatal-handler.ts with injectable
seams; unit-tested incl. the never-touch-stack invariant.

Closes #850.
Colby Mchenry 1 هفته پیش
والد
کامیت
3476ac9a27
4فایلهای تغییر یافته به همراه205 افزوده شده و 8 حذف شده
  1. 4 0
      CHANGELOG.md
  2. 100 0
      __tests__/fatal-handler.test.ts
  3. 8 8
      src/bin/codegraph.ts
  4. 93 0
      src/bin/fatal-handler.ts

+ 4 - 0
CHANGELOG.md

@@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### Fixes
+
+- The CodeGraph MCP server no longer risks getting stuck at 100% CPU after an unexpected internal error. Previously such an error was logged but the process was left running in a broken state, where it could spin a CPU core indefinitely and had to be killed by hand. The server now logs the error and exits cleanly, so a fresh one starts on the next request. Thanks @songhlc. (#850)
+
 
 ## [1.0.0] - 2026-06-12
 

+ 100 - 0
__tests__/fatal-handler.test.ts

@@ -0,0 +1,100 @@
+import { describe, it, expect } from 'vitest';
+import { EventEmitter } from 'events';
+import { describeFatal, installFatalHandlers } from '../src/bin/fatal-handler';
+
+/**
+ * Regression coverage for #850 (and the related #799): a fault that reaches the
+ * process-wide handler must NOT be swallowed-and-kept-running, and rendering it
+ * must NEVER touch `error.stack` — the lazy stack getter is what can wedge a
+ * core in a V8 source-position loop.
+ */
+describe('describeFatal', () => {
+  it('renders name + message for an Error', () => {
+    expect(describeFatal(new TypeError('boom'))).toBe('TypeError: boom');
+  });
+
+  it('falls back to the name when the message is empty', () => {
+    expect(describeFatal(new Error(''))).toBe('Error');
+  });
+
+  it('stringifies non-Error values', () => {
+    expect(describeFatal('a string reason')).toBe('a string reason');
+    expect(describeFatal(42)).toBe('42');
+    expect(describeFatal(null)).toBe('null');
+    expect(describeFatal(undefined)).toBe('undefined');
+  });
+
+  it('NEVER reads error.stack (the #850 hang lives in the lazy stack getter)', () => {
+    const err = new Error('boom');
+    let stackAccessed = false;
+    Object.defineProperty(err, 'stack', {
+      configurable: true,
+      get() {
+        // Simulates the pathological case: formatting the stack never returns.
+        stackAccessed = true;
+        throw new Error('stack formatting wedged');
+      },
+    });
+
+    const rendered = describeFatal(err);
+
+    expect(stackAccessed).toBe(false);
+    expect(rendered).toBe('Error: boom');
+    expect(rendered).not.toMatch(/\bat\b/); // no stack frames leaked in
+  });
+
+  it('never throws on a value with a hostile toString', () => {
+    const hostile = {
+      toString() {
+        throw new Error('no stringification for you');
+      },
+    };
+    expect(describeFatal(hostile)).toBe('<unstringifiable value>');
+  });
+});
+
+describe('installFatalHandlers', () => {
+  function harness() {
+    const target = new EventEmitter();
+    const writes: string[] = [];
+    const exits: number[] = [];
+    installFatalHandlers({
+      target,
+      write: (line) => writes.push(line),
+      exit: (code) => {
+        exits.push(code);
+      },
+    });
+    return { target, writes, exits };
+  }
+
+  it('logs a bounded line and exits non-zero on an uncaught exception', () => {
+    const { target, writes, exits } = harness();
+    target.emit('uncaughtException', new RangeError('kaboom'));
+    expect(writes).toEqual(['[CodeGraph] Uncaught exception: RangeError: kaboom\n']);
+    expect(exits).toEqual([1]);
+  });
+
+  it('logs a bounded line and exits non-zero on an unhandled rejection', () => {
+    const { target, writes, exits } = harness();
+    target.emit('unhandledRejection', 'promise went sideways');
+    expect(writes).toEqual(['[CodeGraph] Unhandled rejection: promise went sideways\n']);
+    expect(exits).toEqual([1]);
+  });
+
+  it('still exits — without touching the stack — when stack formatting would wedge', () => {
+    const { target, writes, exits } = harness();
+    const err = new Error('wedged');
+    Object.defineProperty(err, 'stack', {
+      configurable: true,
+      get() {
+        throw new Error('stack formatting wedged');
+      },
+    });
+
+    // Must not throw or hang: the handler renders message-only and exits.
+    expect(() => target.emit('uncaughtException', err)).not.toThrow();
+    expect(writes).toEqual(['[CodeGraph] Uncaught exception: Error: wedged\n']);
+    expect(exits).toEqual([1]);
+  });
+});

+ 8 - 8
src/bin/codegraph.ts

@@ -32,6 +32,7 @@ import { createShimmerProgress } from '../ui/shimmer-progress';
 import { getGlyphs } from '../ui/glyphs';
 
 import { buildNode25BlockBanner, buildNodeTooOldBanner, MIN_NODE_MAJOR } from './node-version-check';
+import { installFatalHandlers } from './fatal-handler';
 import { relaunchWithWasmRuntimeFlagsIfNeeded } from '../extraction/wasm-runtime-flags';
 import { EXTRACTION_VERSION } from '../extraction/extraction-version';
 import { getTelemetry, TELEMETRY_DOCS, recordIndexEvent } from '../telemetry';
@@ -90,6 +91,13 @@ if (nodeMajor < MIN_NODE_MAJOR) {
 // inherits this process's flags) is compiled. See ../extraction/wasm-runtime-flags.
 relaunchWithWasmRuntimeFlagsIfNeeded(__filename);
 
+// Last-resort fatal handlers: log a bounded line and exit non-zero. A fault
+// that reaches here escaped every boundary, so the process is in an undefined
+// state — keeping it alive is what let the detached MCP daemon orphan and pin a
+// CPU core with no recovery (#799, #850). Installed before the command branch
+// so it also covers a synchronous throw during startup. See ./fatal-handler.
+installFatalHandlers();
+
 // Check if running with no arguments - run installer
 if (process.argv.length === 2) {
   import('../installer').then(({ runInstaller }) =>
@@ -103,14 +111,6 @@ if (process.argv.length === 2) {
   main();
 }
 
-process.on('uncaughtException', (error) => {
-  console.error('[CodeGraph] Uncaught exception:', error);
-});
-
-process.on('unhandledRejection', (reason) => {
-  console.error('[CodeGraph] Unhandled rejection:', reason);
-});
-
 function main() {
 
 const program = new Command();

+ 93 - 0
src/bin/fatal-handler.ts

@@ -0,0 +1,93 @@
+/**
+ * Last-resort handlers for uncaught exceptions and unhandled rejections.
+ *
+ * Reaching one of these means a fault escaped every boundary (per-request
+ * try/catch in the MCP transport, the file watcher's own `'error'` handlers,
+ * telemetry's fail-silent contract) — i.e. the process is in an undefined
+ * state. Node's default in that case is to print and exit non-zero. The CLI
+ * previously OVERRODE that to "log the error and keep running", which is the
+ * bug behind two production incidents:
+ *
+ *   - #799 — a stdin socket `'error'` escalated here; the server logged it and
+ *     kept running, orphaning the detached MCP daemon and (on Linux) spinning a
+ *     POLLHUP fd at 100% CPU. Fixed for that one trigger by treating stdin
+ *     failure as shutdown (`src/mcp/stdin-teardown.ts`).
+ *   - #850 — a *different* uncaught exception hit the same handler. Logging it
+ *     forced V8 to lazily format the Error's `.stack`, which entered a
+ *     non-terminating source-position walk and pinned a core. Because the
+ *     handler kept the process alive, the detached daemon was left wedged: its
+ *     PPID watchdog and idle-timer (both `setInterval`s) could no longer fire,
+ *     and nothing respawned it — unrecoverable without a manual `kill`.
+ *
+ * The fix restores the safe default: log a BOUNDED, hang-proof line, then exit
+ * non-zero so a fresh daemon starts on the next connection.
+ *
+ * Two properties are load-bearing and covered by tests:
+ *   1. {@link describeFatal} never reads `error.stack` and never hands the raw
+ *      Error to `console.*`. The lazy stack getter is exactly the step that can
+ *      wedge (#850); since it would run *inside* this handler, touching it could
+ *      block the very `exit()` below. Name + message are plain string
+ *      properties and are always safe.
+ *   2. We write synchronously to fd 2 and then exit, so the message is flushed
+ *      even though `process.exit()` doesn't drain async streams.
+ */
+import * as fs from 'fs';
+
+/**
+ * Render an uncaught value for the last-resort log WITHOUT triggering stack
+ * formatting. Pure and total — never throws, never touches `.stack`.
+ */
+export function describeFatal(value: unknown): string {
+  if (value instanceof Error) {
+    const name = typeof value.name === 'string' && value.name ? value.name : 'Error';
+    // `message` is a plain own/proto string property — reading it does NOT
+    // format the stack (which is what can loop forever, #850).
+    const message = typeof value.message === 'string' ? value.message : '';
+    return message ? `${name}: ${message}` : name;
+  }
+  try {
+    return String(value);
+  } catch {
+    // e.g. an object with a throwing `toString` / `Symbol.toPrimitive`.
+    return '<unstringifiable value>';
+  }
+}
+
+/** Best-effort synchronous stderr write that can never keep a doomed process alive. */
+function writeStderr(line: string): void {
+  try {
+    fs.writeSync(2, line);
+  } catch {
+    /* stderr closed/gone — nothing more we can safely do */
+  }
+}
+
+/** Injectable seams so the wiring is testable without registering real handlers. */
+export interface FatalHandlerDeps {
+  /** Event target to attach to. Defaults to `process`. */
+  target?: NodeJS.EventEmitter;
+  /** How to terminate. Defaults to `process.exit`. */
+  exit?: (code: number) => void;
+  /** How to emit the bounded line. Defaults to a synchronous fd-2 write. */
+  write?: (line: string) => void;
+}
+
+/**
+ * Install the uncaught-exception / unhandled-rejection handlers. Both log a
+ * bounded line and then exit non-zero (Node's default fatal semantics).
+ */
+export function installFatalHandlers(deps: FatalHandlerDeps = {}): void {
+  const target = deps.target ?? process;
+  const exit = deps.exit ?? ((code: number) => process.exit(code));
+  const write = deps.write ?? writeStderr;
+
+  target.on('uncaughtException', (error: unknown) => {
+    write(`[CodeGraph] Uncaught exception: ${describeFatal(error)}\n`);
+    exit(1);
+  });
+
+  target.on('unhandledRejection', (reason: unknown) => {
+    write(`[CodeGraph] Unhandled rejection: ${describeFatal(reason)}\n`);
+    exit(1);
+  });
+}