| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- import { describe, it, expect, beforeAll } from 'vitest';
- import { spawn } from 'child_process';
- import * as fs from 'fs';
- import * as path from 'path';
- import {
- parseWatchdogTimeoutMs,
- deriveCheckIntervalMs,
- installMainThreadWatchdog,
- DEFAULT_WATCHDOG_TIMEOUT_MS,
- } from '../src/mcp/liveness-watchdog';
- describe('config parsing', () => {
- it('parseWatchdogTimeoutMs falls back for missing/invalid input', () => {
- expect(parseWatchdogTimeoutMs(undefined)).toBe(DEFAULT_WATCHDOG_TIMEOUT_MS);
- expect(parseWatchdogTimeoutMs('not-a-number')).toBe(DEFAULT_WATCHDOG_TIMEOUT_MS);
- expect(parseWatchdogTimeoutMs('0')).toBe(DEFAULT_WATCHDOG_TIMEOUT_MS);
- expect(parseWatchdogTimeoutMs('-5')).toBe(DEFAULT_WATCHDOG_TIMEOUT_MS);
- expect(parseWatchdogTimeoutMs('1500')).toBe(1500);
- });
- it('deriveCheckIntervalMs stays within [50, 2000] and scales with the timeout', () => {
- expect(deriveCheckIntervalMs(60_000)).toBe(2000); // clamped high
- expect(deriveCheckIntervalMs(500)).toBe(100); // 500/5
- expect(deriveCheckIntervalMs(10)).toBe(50); // clamped low
- });
- });
- describe('installMainThreadWatchdog opt-out', () => {
- it('returns null (spawns nothing) when CODEGRAPH_NO_WATCHDOG is set', () => {
- const prev = process.env.CODEGRAPH_NO_WATCHDOG;
- process.env.CODEGRAPH_NO_WATCHDOG = '1';
- try {
- expect(installMainThreadWatchdog()).toBeNull();
- } finally {
- if (prev === undefined) delete process.env.CODEGRAPH_NO_WATCHDOG;
- else process.env.CODEGRAPH_NO_WATCHDOG = prev;
- }
- });
- });
- /**
- * End-to-end: spawn a real process, install the real watchdog (which spawns a
- * separate watchdog child), and prove it kills a wedged main thread — including
- * the case a worker thread could NOT (a non-allocating loop under heap pressure,
- * which strands a same-process worker on V8's global safepoint, #850). Drives
- * the built module the way mcp-ppid-watchdog.test.ts drives the built CLI.
- */
- describe('liveness watchdog (spawned, real watchdog process)', () => {
- const MODULE = path.resolve(__dirname, '../dist/mcp/liveness-watchdog.js');
- beforeAll(() => {
- if (!fs.existsSync(MODULE)) {
- throw new Error(`Build the project first: ${MODULE} is missing (run npm run build).`);
- }
- });
- function runChild(
- env: Record<string, string>,
- body: string,
- hardTimeoutMs: number
- ): Promise<{ code: number | null; signal: NodeJS.Signals | 'TIMEOUT' | null }> {
- const src = `
- const { installMainThreadWatchdog } = require(${JSON.stringify(MODULE)});
- installMainThreadWatchdog();
- ${body}
- `;
- const child = spawn(process.execPath, ['-e', src], {
- env: { ...process.env, ...env },
- stdio: ['ignore', 'ignore', 'ignore'],
- });
- return new Promise((resolve) => {
- const timer = setTimeout(() => {
- child.kill('SIGKILL');
- resolve({ code: null, signal: 'TIMEOUT' });
- }, hardTimeoutMs);
- child.on('exit', (code, signal) => {
- clearTimeout(timer);
- resolve({ code, signal });
- });
- });
- }
- // Assert the watchdog terminated the process. POSIX surfaces the external
- // SIGKILL as signal 'SIGKILL'; Windows has no real signals, so the watchdog's
- // `process.kill(pid, 'SIGKILL')` maps to TerminateProcess and an observer sees
- // signal=null with a non-zero exit code. Either is a kill; the synthetic
- // 'TIMEOUT' (the watchdog never fired) is the failure we're guarding against.
- function expectKilled(r: { code: number | null; signal: NodeJS.Signals | 'TIMEOUT' | null }): void {
- expect(r.signal === 'SIGKILL' || (r.signal === null && r.code !== 0 && r.code !== null)).toBe(true);
- }
- it('SIGKILLs a process whose main thread wedges in a sync loop', async () => {
- const r = await runChild(
- { CODEGRAPH_WATCHDOG_TIMEOUT_MS: '500' },
- 'setTimeout(() => { while (true) {} }, 150);',
- 8000
- );
- expectKilled(r);
- }, 12000);
- it('SIGKILLs a non-allocating wedge under heap pressure (the case worker threads stalled on)', async () => {
- const r = await runChild(
- { CODEGRAPH_WATCHDOG_TIMEOUT_MS: '500' },
- // ~40MB retained so a GC is likely, then a tight NON-allocating loop — the
- // exact shape that deadlocks a same-process worker on the global safepoint.
- 'const k=[]; for (let i=0;i<40;i++) k.push(Buffer.alloc(1024*1024,i)); global.__k=k; setTimeout(() => { while (true) {} }, 150);',
- 8000
- );
- expectKilled(r);
- }, 12000);
- it('does NOT kill a healthy process that keeps its event loop turning', async () => {
- const { code, signal } = await runChild(
- { CODEGRAPH_WATCHDOG_TIMEOUT_MS: '500' },
- 'const iv = setInterval(() => {}, 50); setTimeout(() => { clearInterval(iv); process.exit(7); }, 1500);',
- 8000
- );
- expect(signal).toBeNull(); // never signalled
- expect(code).toBe(7); // exited on its own terms
- }, 12000);
- it('does NOT kill a wedged process when CODEGRAPH_NO_WATCHDOG=1', async () => {
- const { code, signal } = await runChild(
- { CODEGRAPH_WATCHDOG_TIMEOUT_MS: '500', CODEGRAPH_NO_WATCHDOG: '1' },
- 'setTimeout(() => { const end = Date.now() + 1500; while (Date.now() < end) {} process.exit(3); }, 150);',
- 8000
- );
- // It exits with its OWN code 3 — proving nothing killed it. (Checking only
- // signal=null is insufficient on Windows, where a kill also reports null.)
- expect(signal).toBeNull();
- expect(code).toBe(3);
- }, 12000);
- });
|