Files
everything-claude-code/scripts/lib/shell-substitution.js
SeungHyun 8cfadfea28 fix(hooks): close grouped command bypasses in gateguard (#1912)
Inspect executable bodies inside plain subshells and brace groups before applying destructive command classifiers.\n\nCo-authored-by: Jamkris <82251632+Jamkris@users.noreply.github.com>
2026-05-15 01:39:15 -04:00

495 lines
14 KiB
JavaScript

'use strict';
/**
* Extract executable command-substitution bodies from a shell line.
*
* Single quotes are literal, so substitutions inside them are ignored;
* double quotes still permit substitutions, so those bodies are scanned
* before quoted text is stripped. Returns each substitution body plus
* any nested substitutions discovered recursively.
*
* Originally introduced in scripts/hooks/gateguard-fact-force.js
* (PR #1853 round 2). Extracted to a shared lib so other PreToolUse
* hooks that need the same "scan inside `$(...)` and backticks"
* behavior can reuse it without duplicating the parser.
*
* @param {string} input
* @returns {string[]}
*/
function extractCommandSubstitutions(input) {
const source = String(input || '');
const substitutions = [];
let inSingle = false;
let inDouble = false;
for (let i = 0; i < source.length; i++) {
const ch = source[i];
const prev = source[i - 1];
if (ch === '\\' && !inSingle) {
i += 1;
continue;
}
if (ch === "'" && !inDouble && prev !== '\\') {
inSingle = !inSingle;
continue;
}
if (ch === '"' && !inSingle && prev !== '\\') {
inDouble = !inDouble;
continue;
}
if (inSingle) {
continue;
}
if (ch === '`') {
let body = '';
i += 1;
while (i < source.length) {
const inner = source[i];
if (inner === '\\') {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === '`') {
break;
}
body += inner;
i += 1;
}
if (body.trim()) {
substitutions.push(body);
substitutions.push(...extractCommandSubstitutions(body));
}
continue;
}
if (ch === '$' && source[i + 1] === '(') {
let depth = 1;
let body = '';
let bodyInSingle = false;
let bodyInDouble = false;
i += 2;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !bodyInSingle) {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === "'" && !bodyInDouble && innerPrev !== '\\') {
bodyInSingle = !bodyInSingle;
} else if (inner === '"' && !bodyInSingle && innerPrev !== '\\') {
bodyInDouble = !bodyInDouble;
} else if (!bodyInSingle && !bodyInDouble) {
if (inner === '(') {
depth += 1;
} else if (inner === ')') {
depth -= 1;
if (depth === 0) {
break;
}
}
}
body += inner;
i += 1;
}
if (body.trim()) {
substitutions.push(body);
substitutions.push(...extractCommandSubstitutions(body));
}
}
}
return substitutions;
}
/**
* Extract bodies of plain `(...)` subshell groups.
*
* Bash treats `(npm run dev)` as a subshell that executes its contents, but
* the regex-light segment splitters used by our PreToolUse hooks don't peer
* inside those parens. This helper finds top-level `(...)` groups (skipping
* `$(...)` command substitutions and backticks, which `extractCommandSubstitutions`
* already covers) and returns each body, recursing for nested groups.
*
* Quote semantics:
* - Single quotes are literal: `'( ... )'` is a string, not a subshell.
* - Double quotes are literal *for parens*: `"( ... )"` is a string too —
* bash only honors `$( )` inside double quotes, not bare `( )`.
*
* @param {string} input
* @returns {string[]}
*/
function extractSubshellGroups(input) {
const source = String(input || '');
const groups = [];
let inSingle = false;
let inDouble = false;
for (let i = 0; i < source.length; i++) {
const ch = source[i];
const prev = source[i - 1];
if (ch === '\\' && !inSingle) {
i += 1;
continue;
}
if (ch === "'" && !inDouble && prev !== '\\') {
inSingle = !inSingle;
continue;
}
if (ch === '"' && !inSingle && prev !== '\\') {
inDouble = !inDouble;
continue;
}
if (inSingle || inDouble) {
continue;
}
if (ch === '$' && source[i + 1] === '(') {
let depth = 1;
let skipInSingle = false;
let skipInDouble = false;
i += 2;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !skipInSingle) {
i += 2;
continue;
}
if (inner === "'" && !skipInDouble && innerPrev !== '\\') {
skipInSingle = !skipInSingle;
} else if (inner === '"' && !skipInSingle && innerPrev !== '\\') {
skipInDouble = !skipInDouble;
} else if (!skipInSingle && !skipInDouble) {
if (inner === '(') depth += 1;
else if (inner === ')') depth -= 1;
}
i += 1;
}
i -= 1;
continue;
}
if (ch === '`') {
i += 1;
while (i < source.length && source[i] !== '`') {
if (source[i] === '\\' && i + 1 < source.length) {
i += 2;
continue;
}
i += 1;
}
continue;
}
if (ch === '(') {
let depth = 1;
let body = '';
let bodyInSingle = false;
let bodyInDouble = false;
i += 1;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !bodyInSingle) {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === "'" && !bodyInDouble && innerPrev !== '\\') {
bodyInSingle = !bodyInSingle;
} else if (inner === '"' && !bodyInSingle && innerPrev !== '\\') {
bodyInDouble = !bodyInDouble;
} else if (!bodyInSingle && !bodyInDouble) {
if (inner === '(') {
depth += 1;
} else if (inner === ')') {
depth -= 1;
if (depth === 0) {
break;
}
}
}
body += inner;
i += 1;
}
if (body.trim()) {
groups.push(body);
groups.push(...extractSubshellGroups(body));
}
}
}
return groups;
}
/**
* Extract bodies of `{ ...; }` brace groups.
*
* Bash brace groups run their body in the *current* shell (unlike `(...)`,
* which forks a subshell). Both forms group multiple commands, so for the
* purposes of destructive-bash and dev-server detection they are equivalent:
* a `rm -rf` or `npm run dev` inside `{ ...; }` still executes.
*
* Recognition rules match bash's own reserved-word semantics:
* - `{` is a reserved word only when followed by whitespace and preceded by
* the line start, whitespace, or a shell operator (`;`, `|`, `&`, `(`).
* So `{npm run dev}` is NOT a brace group (single token starting with `{`).
* - `}` closes the group only when preceded by `;` or whitespace.
* So `foo}` inside the body is not a closing brace.
* - Single quotes are literal; double quotes are also literal for `{`/`}`.
* - `$(...)`, backticks, and plain `(...)` spans are skipped so we don't
* double-extract bodies the sibling extractors already cover.
*
* @param {string} input
* @returns {string[]}
*/
function extractBraceGroups(input) {
const source = String(input || '');
const groups = [];
let inSingle = false;
let inDouble = false;
for (let i = 0; i < source.length; i++) {
const ch = source[i];
const prev = source[i - 1];
if (ch === '\\' && !inSingle) {
i += 1;
continue;
}
if (ch === "'" && !inDouble && prev !== '\\') {
inSingle = !inSingle;
continue;
}
if (ch === '"' && !inSingle && prev !== '\\') {
inDouble = !inDouble;
continue;
}
if (inSingle || inDouble) {
continue;
}
if (ch === '$' && source[i + 1] === '(') {
let depth = 1;
let skipInSingle = false;
let skipInDouble = false;
i += 2;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !skipInSingle) {
i += 2;
continue;
}
if (inner === "'" && !skipInDouble && innerPrev !== '\\') {
skipInSingle = !skipInSingle;
} else if (inner === '"' && !skipInSingle && innerPrev !== '\\') {
skipInDouble = !skipInDouble;
} else if (!skipInSingle && !skipInDouble) {
if (inner === '(') depth += 1;
else if (inner === ')') depth -= 1;
}
i += 1;
}
i -= 1;
continue;
}
if (ch === '`') {
i += 1;
while (i < source.length && source[i] !== '`') {
if (source[i] === '\\' && i + 1 < source.length) {
i += 2;
continue;
}
i += 1;
}
continue;
}
if (ch === '(') {
let depth = 1;
let skipInSingle = false;
let skipInDouble = false;
i += 1;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !skipInSingle) {
i += 2;
continue;
}
if (inner === "'" && !skipInDouble && innerPrev !== '\\') {
skipInSingle = !skipInSingle;
} else if (inner === '"' && !skipInSingle && innerPrev !== '\\') {
skipInDouble = !skipInDouble;
} else if (!skipInSingle && !skipInDouble) {
if (inner === '(') depth += 1;
else if (inner === ')') depth -= 1;
}
i += 1;
}
i -= 1;
continue;
}
if (ch === '{' && /\s/.test(source[i + 1] || '')) {
const prevIsBoundary = i === 0 || /[\s;|&(]/.test(prev);
if (!prevIsBoundary) continue;
let depth = 1;
let body = '';
let bodyInSingle = false;
let bodyInDouble = false;
i += 1;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !bodyInSingle) {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === "'" && !bodyInDouble && innerPrev !== '\\') {
bodyInSingle = !bodyInSingle;
body += inner;
i += 1;
continue;
}
if (inner === '"' && !bodyInSingle && innerPrev !== '\\') {
bodyInDouble = !bodyInDouble;
body += inner;
i += 1;
continue;
}
if (bodyInSingle || bodyInDouble) {
body += inner;
i += 1;
continue;
}
// Skip $(...) spans — a quoted `}` or `}`-as-text inside a
// substitution body must not close the enclosing brace group.
if (inner === '$' && source[i + 1] === '(') {
body += inner + source[i + 1];
let subDepth = 1;
let subInSingle = false;
let subInDouble = false;
i += 2;
while (i < source.length && subDepth > 0) {
const c = source[i];
const p = source[i - 1];
body += c;
if (c === '\\' && !subInSingle && i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle;
else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble;
else if (!subInSingle && !subInDouble) {
if (c === '(') subDepth += 1;
else if (c === ')') subDepth -= 1;
}
i += 1;
}
continue;
}
// Skip backtick spans for the same reason.
if (inner === '`') {
body += inner;
i += 1;
while (i < source.length && source[i] !== '`') {
if (source[i] === '\\' && i + 1 < source.length) {
body += source[i] + source[i + 1];
i += 2;
continue;
}
body += source[i];
i += 1;
}
if (i < source.length) {
body += source[i];
i += 1;
}
continue;
}
// Skip plain (...) subshell spans for the same reason.
if (inner === '(') {
body += inner;
let subDepth = 1;
let subInSingle = false;
let subInDouble = false;
i += 1;
while (i < source.length && subDepth > 0) {
const c = source[i];
const p = source[i - 1];
body += c;
if (c === '\\' && !subInSingle && i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle;
else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble;
else if (!subInSingle && !subInDouble) {
if (c === '(') subDepth += 1;
else if (c === ')') subDepth -= 1;
}
i += 1;
}
continue;
}
if (inner === '{' && /\s/.test(source[i + 1] || '')) {
// Match the outer-scan boundary rule for nested `{` so
// tokens like `foo{` (no boundary, but followed by space
// via `foo{ bar`) cannot bump nested depth.
const nestedPrevIsBoundary = /[\s;|&(]/.test(innerPrev);
if (nestedPrevIsBoundary) depth += 1;
} else if (inner === '}' && (innerPrev === ';' || /\s/.test(innerPrev))) {
depth -= 1;
if (depth === 0) {
break;
}
}
body += inner;
i += 1;
}
if (body.trim()) {
groups.push(body);
groups.push(...extractBraceGroups(body));
}
}
}
return groups;
}
module.exports = { extractCommandSubstitutions, extractSubshellGroups, extractBraceGroups };