Files
everything-claude-code/scripts/lib/session-bridge.js
Jamkris 28548f67ba fix(lib): use unique tmp suffix in writeBridgeAtomic to eliminate ENOENT race
`writeBridgeAtomic` wrote to a fixed `${target}.tmp` path before
calling `renameSync`. When two processes write to the same session
bridge concurrently (e.g. PostToolUse `ecc-metrics-bridge` + the
background `ecc-statusline`, both calling `writeBridgeAtomic(sessionId, ...)`),
the canonical atomic-rename race fires:

  1. Process A: writeFileSync(target.tmp, JSON_A) — tmp file exists.
  2. Process B: writeFileSync(target.tmp, JSON_B) — tmp file overwritten.
  3. Process A: renameSync(target.tmp, target) — succeeds; target = JSON_B
     (A's payload silently corrupted en-route).
  4. Process B: renameSync(target.tmp, target) — throws ENOENT (the
     rename consumed the file).

Every caller in the repo wraps `writeBridgeAtomic` in `try {} catch {}`,
so the ENOENT exception is swallowed and the user-visible symptom is
just "the bridge file occasionally contains the wrong process's
payload" with no diagnostic.

Reproduced before this commit:

  $ # two concurrent writers, each calling writeBridgeAtomic 500 times
  $ # against the same session ID
  [A] errors=244   # 244 ENOENT exceptions swallowed
  [B] errors=248   # ditto

After this commit the same workload reports 0 errors in both
subprocesses: tmp paths no longer collide.

Fix: change `${target}.tmp` to
`${target}.${process.pid}.${crypto.randomBytes(4).toString('hex')}.tmp`,
matching the pattern already used by `writeCostWarningIfChanged` in
`scripts/hooks/ecc-metrics-bridge.js` (commit 9b1d8918). The pid +
4-byte nonce gives each writer process a distinct tmp path, so step 2
above no longer overwrites step 1's payload and step 4 no longer
races step 3.

Also added: on `renameSync` failure, attempt `fs.unlinkSync(tmp)` so
a writer that fails (disk full, permission, parent dir gone) does
not leak its tmp file. The cleanup is best-effort and the original
error is still re-thrown.

**Scope clarification.** This commit closes the atomic-rename
primitive's race only. The *read-modify-write* race in callers —
two writers each read the same bridge state, increment, and write
back, the second clobbering the first — is a separate concern that
needs locking or per-writer logs, and is intentionally out of scope
for this PR. The cost-tracker / metrics-bridge callers tolerate
last-writer-wins on their cumulative aggregates today and this
commit does not change that contract.

The companion `writeWarnState` in `ecc-context-monitor.js` has the
same fixed-suffix pattern and the same race; that fix lands in the
next commit so each can be reviewed against its own diff.
2026-05-19 04:57:10 -04:00

103 lines
3.0 KiB
JavaScript

'use strict';
/**
* Shared session bridge utilities for ECC hooks.
*
* The bridge file is a small JSON aggregate in /tmp that allows
* statusline, metrics-bridge, and context-monitor to share state
* without scanning large JSONL logs on every invocation.
*/
const crypto = require('crypto');
const fs = require('fs');
const os = require('os');
const path = require('path');
const MAX_SESSION_ID_LENGTH = 64;
/**
* Sanitize a session ID for safe use in file paths.
* Rejects path traversal, strips unsafe chars, limits length.
* @param {string} raw
* @returns {string|null} Safe session ID or null if invalid
*/
function sanitizeSessionId(raw) {
if (!raw || typeof raw !== 'string') return null;
if (/[/\\]|\.\./.test(raw)) return null;
const safe = raw.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, MAX_SESSION_ID_LENGTH);
return safe || null;
}
/**
* Get the bridge file path for a session.
* @param {string} sessionId - Already-sanitized session ID
* @returns {string}
*/
function getBridgePath(sessionId) {
return path.join(os.tmpdir(), `ecc-metrics-${sessionId}.json`);
}
/**
* Read bridge data. Returns null on any error.
* @param {string} sessionId - Already-sanitized session ID
* @returns {object|null}
*/
function readBridge(sessionId) {
try {
const raw = fs.readFileSync(getBridgePath(sessionId), 'utf8');
return JSON.parse(raw);
} catch {
return null;
}
}
/**
* Write bridge data atomically (write unique-suffix tmp then rename).
*
* The tmp path includes `process.pid` plus a random nonce so concurrent
* writers (e.g. PostToolUse `ecc-metrics-bridge` and the background
* `ecc-statusline`, both writing to the same session bridge) do not
* clobber each other's tmp file mid-write. With a fixed `.tmp` suffix
* two writers could both call `writeFileSync` against the same path
* before either reaches `renameSync`, causing one writer's payload to
* silently overwrite the other and the second `renameSync` to throw
* ENOENT once the rename consumes the file.
*
* Same pattern already used by `writeCostWarningIfChanged` in
* `scripts/hooks/ecc-metrics-bridge.js` (commit 9b1d8918) for the
* cost-warning cache; this commit applies it to the session-bridge
* primitive too.
*
* @param {string} sessionId - Already-sanitized session ID
* @param {object} data
*/
function writeBridgeAtomic(sessionId, data) {
const target = getBridgePath(sessionId);
const tmp = `${target}.${process.pid}.${crypto.randomBytes(4).toString('hex')}.tmp`;
fs.writeFileSync(tmp, JSON.stringify(data), 'utf8');
try {
fs.renameSync(tmp, target);
} catch (err) {
try { fs.unlinkSync(tmp); } catch { /* ignore */ }
throw err;
}
}
/**
* Resolve session ID from environment variables.
* @returns {string|null} Sanitized session ID or null
*/
function resolveSessionId() {
const raw = process.env.ECC_SESSION_ID || process.env.CLAUDE_SESSION_ID || '';
return sanitizeSessionId(raw);
}
module.exports = {
sanitizeSessionId,
getBridgePath,
readBridge,
writeBridgeAtomic,
resolveSessionId,
MAX_SESSION_ID_LENGTH
};