From 8cfadfea284432a2beb90127e1a2adad0ed89e6f Mon Sep 17 00:00:00 2001 From: SeungHyun Date: Fri, 15 May 2026 14:39:15 +0900 Subject: [PATCH] fix(hooks): close grouped command bypasses in gateguard (#1912) Inspect executable bodies inside plain subshells and brace groups before applying destructive command classifiers.\n\nCo-authored-by: Jamkris <82251632+Jamkris@users.noreply.github.com> --- scripts/hooks/gateguard-fact-force.js | 154 +++++--------- scripts/lib/shell-substitution.js | 250 ++++++++++++++++++++++- tests/hooks/gateguard-fact-force.test.js | 109 ++++++++++ 3 files changed, 412 insertions(+), 101 deletions(-) diff --git a/scripts/hooks/gateguard-fact-force.js b/scripts/hooks/gateguard-fact-force.js index 3568dc3c..e49d6c14 100644 --- a/scripts/hooks/gateguard-fact-force.js +++ b/scripts/hooks/gateguard-fact-force.js @@ -25,6 +25,11 @@ const crypto = require('crypto'); const fs = require('fs'); const path = require('path'); +const { + extractCommandSubstitutions, + extractSubshellGroups, + extractBraceGroups +} = require('../lib/shell-substitution'); // Session state — scoped per session to avoid cross-session races. const STATE_DIR = process.env.GATEGUARD_STATE_DIR || path.join(process.env.HOME || process.env.USERPROFILE || '/tmp', '.gateguard'); @@ -84,105 +89,6 @@ function explodeSubshells(input) { return out; } -/** - * Extract executable command-substitution bodies from a shell line. Single - * quotes are literal, so substitutions inside them are ignored; double quotes - * still permit substitutions, so those bodies are scanned before quoted text - * is stripped. - * - * @param {string} input - * @returns {string[]} - */ -function extractCommandSubstitutions(input) { - const source = String(input || ''); - const substitutions = []; - let inSingle = false; - let inDouble = false; - - for (let i = 0; i < source.length; i++) { - const ch = source[i]; - const prev = source[i - 1]; - - if (ch === '\\' && !inSingle) { - i += 1; - continue; - } - - if (ch === "'" && !inDouble && prev !== '\\') { - inSingle = !inSingle; - continue; - } - - if (ch === '"' && !inSingle && prev !== '\\') { - inDouble = !inDouble; - continue; - } - - if (inSingle) { - continue; - } - - if (ch === '`') { - let body = ''; - i += 1; - while (i < source.length) { - const inner = source[i]; - if (inner === '\\') { - body += inner; - if (i + 1 < source.length) { - body += source[i + 1]; - i += 2; - continue; - } - } - if (inner === '`') { - break; - } - body += inner; - i += 1; - } - if (body.trim()) { - substitutions.push(body); - substitutions.push(...extractCommandSubstitutions(body)); - } - continue; - } - - if (ch === '$' && source[i + 1] === '(') { - let depth = 1; - let body = ''; - i += 2; - while (i < source.length && depth > 0) { - const inner = source[i]; - if (inner === '\\') { - body += inner; - if (i + 1 < source.length) { - body += source[i + 1]; - i += 2; - continue; - } - } - if (inner === '(') { - depth += 1; - } else if (inner === ')') { - depth -= 1; - if (depth === 0) { - break; - } - } - body += inner; - i += 1; - } - if (body.trim()) { - substitutions.push(body); - substitutions.push(...extractCommandSubstitutions(body)); - } - } - } - - return substitutions; -} - /** * Split a command line into top-level segments at unquoted shell * separators (`;`, `|`, `&`, `&&`, `||`) and across subshells @@ -392,6 +298,54 @@ function isDestructiveGit(tokens) { * @param {string} command * @returns {boolean} */ +/** + * Walk every executable body reachable from a raw command line and + * return them as a flat list. Bodies that bash will execute live in + * three different syntactic constructs, each handled by a sibling + * extractor in `scripts/lib/shell-substitution.js`: + * - `$(...)` and backticks via `extractCommandSubstitutions` + * - plain `(...)` subshells via `extractSubshellGroups` + * - `{ ...; }` brace groups via `extractBraceGroups` + * + * Each extractor recurses into its own syntax. The BFS here adds + * cross-syntax discovery — e.g. a `(...)` inside a `$(...)` body, or + * a `{ ...; }` inside a `(...)` body — by feeding every harvested + * body back through all three extractors. A `seen` set bounds the + * cost to O(unique bodies). + * + * @param {string} raw + * @returns {string[]} + */ +function collectExecutableBodies(raw) { + const bodies = [raw]; + const queue = [raw]; + const seen = new Set(); + + while (queue.length) { + const current = queue.shift(); + if (seen.has(current)) continue; + seen.add(current); + + for (const body of extractCommandSubstitutions(current)) { + if (seen.has(body)) continue; + bodies.push(body); + queue.push(body); + } + for (const body of extractSubshellGroups(current)) { + if (seen.has(body)) continue; + bodies.push(body); + queue.push(body); + } + for (const body of extractBraceGroups(current)) { + if (seen.has(body)) continue; + bodies.push(body); + queue.push(body); + } + } + + return bodies; +} + function isDestructiveBash(command) { // The SQL/dd phrases live in command bodies, not as flag-bearing // arguments, so we still match them by regex — but on the input @@ -401,7 +355,7 @@ function isDestructiveBash(command) { const flattened = explodeSubshells(stripQuotedStrings(raw)); if (DESTRUCTIVE_SQL_DD.test(flattened)) return true; - const segments = [raw, ...extractCommandSubstitutions(raw)].flatMap(splitCommandSegments); + const segments = collectExecutableBodies(raw).flatMap(splitCommandSegments); for (const segment of segments) { if (DESTRUCTIVE_SQL_DD.test(stripQuotedStrings(segment))) return true; const tokens = tokenize(segment); diff --git a/scripts/lib/shell-substitution.js b/scripts/lib/shell-substitution.js index dd5d6c74..2689ccb5 100644 --- a/scripts/lib/shell-substitution.js +++ b/scripts/lib/shell-substitution.js @@ -243,4 +243,252 @@ function extractSubshellGroups(input) { return groups; } -module.exports = { extractCommandSubstitutions, extractSubshellGroups }; +/** + * Extract bodies of `{ ...; }` brace groups. + * + * Bash brace groups run their body in the *current* shell (unlike `(...)`, + * which forks a subshell). Both forms group multiple commands, so for the + * purposes of destructive-bash and dev-server detection they are equivalent: + * a `rm -rf` or `npm run dev` inside `{ ...; }` still executes. + * + * Recognition rules match bash's own reserved-word semantics: + * - `{` is a reserved word only when followed by whitespace and preceded by + * the line start, whitespace, or a shell operator (`;`, `|`, `&`, `(`). + * So `{npm run dev}` is NOT a brace group (single token starting with `{`). + * - `}` closes the group only when preceded by `;` or whitespace. + * So `foo}` inside the body is not a closing brace. + * - Single quotes are literal; double quotes are also literal for `{`/`}`. + * - `$(...)`, backticks, and plain `(...)` spans are skipped so we don't + * double-extract bodies the sibling extractors already cover. + * + * @param {string} input + * @returns {string[]} + */ +function extractBraceGroups(input) { + const source = String(input || ''); + const groups = []; + let inSingle = false; + let inDouble = false; + + for (let i = 0; i < source.length; i++) { + const ch = source[i]; + const prev = source[i - 1]; + + if (ch === '\\' && !inSingle) { + i += 1; + continue; + } + + if (ch === "'" && !inDouble && prev !== '\\') { + inSingle = !inSingle; + continue; + } + + if (ch === '"' && !inSingle && prev !== '\\') { + inDouble = !inDouble; + continue; + } + + if (inSingle || inDouble) { + continue; + } + + if (ch === '$' && source[i + 1] === '(') { + let depth = 1; + let skipInSingle = false; + let skipInDouble = false; + i += 2; + while (i < source.length && depth > 0) { + const inner = source[i]; + const innerPrev = source[i - 1]; + if (inner === '\\' && !skipInSingle) { + i += 2; + continue; + } + if (inner === "'" && !skipInDouble && innerPrev !== '\\') { + skipInSingle = !skipInSingle; + } else if (inner === '"' && !skipInSingle && innerPrev !== '\\') { + skipInDouble = !skipInDouble; + } else if (!skipInSingle && !skipInDouble) { + if (inner === '(') depth += 1; + else if (inner === ')') depth -= 1; + } + i += 1; + } + i -= 1; + continue; + } + + if (ch === '`') { + i += 1; + while (i < source.length && source[i] !== '`') { + if (source[i] === '\\' && i + 1 < source.length) { + i += 2; + continue; + } + i += 1; + } + continue; + } + + if (ch === '(') { + let depth = 1; + let skipInSingle = false; + let skipInDouble = false; + i += 1; + while (i < source.length && depth > 0) { + const inner = source[i]; + const innerPrev = source[i - 1]; + if (inner === '\\' && !skipInSingle) { + i += 2; + continue; + } + if (inner === "'" && !skipInDouble && innerPrev !== '\\') { + skipInSingle = !skipInSingle; + } else if (inner === '"' && !skipInSingle && innerPrev !== '\\') { + skipInDouble = !skipInDouble; + } else if (!skipInSingle && !skipInDouble) { + if (inner === '(') depth += 1; + else if (inner === ')') depth -= 1; + } + i += 1; + } + i -= 1; + continue; + } + + if (ch === '{' && /\s/.test(source[i + 1] || '')) { + const prevIsBoundary = i === 0 || /[\s;|&(]/.test(prev); + if (!prevIsBoundary) continue; + + let depth = 1; + let body = ''; + let bodyInSingle = false; + let bodyInDouble = false; + i += 1; + while (i < source.length && depth > 0) { + const inner = source[i]; + const innerPrev = source[i - 1]; + if (inner === '\\' && !bodyInSingle) { + body += inner; + if (i + 1 < source.length) { + body += source[i + 1]; + i += 2; + continue; + } + } + if (inner === "'" && !bodyInDouble && innerPrev !== '\\') { + bodyInSingle = !bodyInSingle; + body += inner; + i += 1; + continue; + } + if (inner === '"' && !bodyInSingle && innerPrev !== '\\') { + bodyInDouble = !bodyInDouble; + body += inner; + i += 1; + continue; + } + if (bodyInSingle || bodyInDouble) { + body += inner; + i += 1; + continue; + } + // Skip $(...) spans — a quoted `}` or `}`-as-text inside a + // substitution body must not close the enclosing brace group. + if (inner === '$' && source[i + 1] === '(') { + body += inner + source[i + 1]; + let subDepth = 1; + let subInSingle = false; + let subInDouble = false; + i += 2; + while (i < source.length && subDepth > 0) { + const c = source[i]; + const p = source[i - 1]; + body += c; + if (c === '\\' && !subInSingle && i + 1 < source.length) { + body += source[i + 1]; + i += 2; + continue; + } + if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle; + else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble; + else if (!subInSingle && !subInDouble) { + if (c === '(') subDepth += 1; + else if (c === ')') subDepth -= 1; + } + i += 1; + } + continue; + } + // Skip backtick spans for the same reason. + if (inner === '`') { + body += inner; + i += 1; + while (i < source.length && source[i] !== '`') { + if (source[i] === '\\' && i + 1 < source.length) { + body += source[i] + source[i + 1]; + i += 2; + continue; + } + body += source[i]; + i += 1; + } + if (i < source.length) { + body += source[i]; + i += 1; + } + continue; + } + // Skip plain (...) subshell spans for the same reason. + if (inner === '(') { + body += inner; + let subDepth = 1; + let subInSingle = false; + let subInDouble = false; + i += 1; + while (i < source.length && subDepth > 0) { + const c = source[i]; + const p = source[i - 1]; + body += c; + if (c === '\\' && !subInSingle && i + 1 < source.length) { + body += source[i + 1]; + i += 2; + continue; + } + if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle; + else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble; + else if (!subInSingle && !subInDouble) { + if (c === '(') subDepth += 1; + else if (c === ')') subDepth -= 1; + } + i += 1; + } + continue; + } + if (inner === '{' && /\s/.test(source[i + 1] || '')) { + // Match the outer-scan boundary rule for nested `{` so + // tokens like `foo{` (no boundary, but followed by space + // via `foo{ bar`) cannot bump nested depth. + const nestedPrevIsBoundary = /[\s;|&(]/.test(innerPrev); + if (nestedPrevIsBoundary) depth += 1; + } else if (inner === '}' && (innerPrev === ';' || /\s/.test(innerPrev))) { + depth -= 1; + if (depth === 0) { + break; + } + } + body += inner; + i += 1; + } + if (body.trim()) { + groups.push(body); + groups.push(...extractBraceGroups(body)); + } + } + } + + return groups; +} + +module.exports = { extractCommandSubstitutions, extractSubshellGroups, extractBraceGroups }; diff --git a/tests/hooks/gateguard-fact-force.test.js b/tests/hooks/gateguard-fact-force.test.js index 00ad9486..74886d80 100644 --- a/tests/hooks/gateguard-fact-force.test.js +++ b/tests/hooks/gateguard-fact-force.test.js @@ -1282,6 +1282,115 @@ function runTests() { 'double-quoted dollar-paren subshell'); })) passed++; else failed++; + // --- Subshell + brace-group bypass coverage --- + // Destructive commands inside `(...)` and `{ ...; }` execute the + // same way they do at the top level, so the destructive classifier + // must see inside those bodies too. Nested parens `((...))` are + // arithmetic-evaluation syntax in bash (not a nested subshell), but + // our parser depth-tracks them conservatively — i.e. the inner + // tokens are still scanned for destructive intent. That's safety + // over precision and the right default for this gate. + + if (test('denies rm -rf inside plain (...) subshell group', () => { + expectDestructiveDeny('(rm -rf /tmp/junk)', 'plain subshell group'); + })) passed++; else failed++; + + if (test('denies rm -rf inside ((...)) — arithmetic eval, treated conservatively', () => { + expectDestructiveDeny('((rm -rf /tmp/junk))', 'arithmetic-eval parens'); + })) passed++; else failed++; + + if (test('denies rm -rf inside { ...; } brace group', () => { + expectDestructiveDeny('{ rm -rf /tmp/junk; }', 'brace group'); + })) passed++; else failed++; + + if (test('denies git push --force inside plain (...) subshell group', () => { + expectDestructiveDeny('(git push --force origin main)', + 'git-force in subshell'); + })) passed++; else failed++; + + if (test('denies git push --force inside { ...; } brace group', () => { + expectDestructiveDeny('{ git push --force origin main; }', + 'git-force in brace group'); + })) passed++; else failed++; + + if (test('denies rm -rf nested across () and {} (cross-syntax)', () => { + expectDestructiveDeny('(echo y; { rm -rf /tmp/junk; })', + '() containing {} cross-syntax'); + })) passed++; else failed++; + + if (test('denies rm -rf nested across $() and () (cross-syntax)', () => { + expectDestructiveDeny('$(echo y; (rm -rf /tmp/junk))', + '$() containing () cross-syntax'); + })) passed++; else failed++; + + // Negative cases — literals and non-destructive commands must NOT + // be promoted to destructive by the new grouping-body walker. + + if (test('allows literal (rm -rf ...) inside single quotes', () => { + expectAllow("git commit -m '(rm -rf /tmp/junk)'", + 'single-quoted subshell literal'); + })) passed++; else failed++; + + if (test('allows literal (rm -rf ...) inside double quotes', () => { + expectAllow('echo "(rm -rf /tmp/junk)"', + 'double-quoted subshell literal'); + })) passed++; else failed++; + + if (test('allows literal { rm -rf ...; } inside double quotes', () => { + expectAllow('echo "{ rm -rf /tmp/junk; }"', + 'double-quoted brace-group literal'); + })) passed++; else failed++; + + if (test('allows non-destructive (echo hello)', () => { + expectAllow('(echo hello)', 'non-destructive subshell'); + })) passed++; else failed++; + + if (test('allows non-destructive { echo hello; }', () => { + expectAllow('{ echo hello; }', 'non-destructive brace group'); + })) passed++; else failed++; + + if (test('allows {rm -rf} — no space after { is not a brace group', () => { + // bash treats `{rm` as a single token; no destructive intent + // can be statically derived from this form, and the command + // would not actually run rm at runtime either. + expectAllow('echo {rm -rf /tmp/junk}', + 'no-space brace literal'); + })) passed++; else failed++; + + // --- Round 1 review fixes: brace-group span-skip + boundary --- + // Verifies the body-accumulation loop in `extractBraceGroups` + // correctly walks past `$(...)`, `(...)`, and backtick spans so + // a `}` inside one of those does not terminate the brace group + // early, plus the nested `{` boundary rule. + + if (test('denies rm -rf in brace group with backtick containing }', () => { + expectDestructiveDeny('{ echo `echo }`; rm -rf /tmp/junk; }', + 'brace + backtick containing }'); + })) passed++; else failed++; + + if (test('denies rm -rf in brace group with $() containing }', () => { + expectDestructiveDeny('{ echo $(echo "}"); rm -rf /tmp/junk; }', + 'brace + $() containing }'); + })) passed++; else failed++; + + if (test('denies rm -rf in brace group with nested () containing }', () => { + expectDestructiveDeny('{ (echo "}"); rm -rf /tmp/junk; }', + 'brace + () containing }'); + })) passed++; else failed++; + + if (test('denies rm -rf in brace group with $() body containing }', () => { + expectDestructiveDeny('{ x=$(echo a}b); rm -rf /tmp/junk; }', + 'brace + $() body with }'); + })) passed++; else failed++; + + if (test('denies rm -rf when token like foo{ appears before brace group close', () => { + // tokens like `foo{` are not reserved-word `{` (no boundary, + // no whitespace after) — must not bump nested-depth and so + // must not delay brace-group close + expectDestructiveDeny('{ echo foo{bar; rm -rf /tmp/junk; }', + 'foo{ token inside brace body'); + })) passed++; else failed++; + // Cleanup only the temp directory created by this test file. try { if (fs.existsSync(stateDir)) {