fix(hooks): close grouped command bypasses in gateguard (#1912)

Inspect executable bodies inside plain subshells and brace groups before applying destructive command classifiers.\n\nCo-authored-by: Jamkris <82251632+Jamkris@users.noreply.github.com>
This commit is contained in:
SeungHyun
2026-05-15 14:39:15 +09:00
committed by GitHub
parent e2992860ae
commit 8cfadfea28
3 changed files with 412 additions and 101 deletions

View File

@@ -25,6 +25,11 @@
const crypto = require('crypto');
const fs = require('fs');
const path = require('path');
const {
extractCommandSubstitutions,
extractSubshellGroups,
extractBraceGroups
} = require('../lib/shell-substitution');
// Session state — scoped per session to avoid cross-session races.
const STATE_DIR = process.env.GATEGUARD_STATE_DIR || path.join(process.env.HOME || process.env.USERPROFILE || '/tmp', '.gateguard');
@@ -84,105 +89,6 @@ function explodeSubshells(input) {
return out;
}
/**
* Extract executable command-substitution bodies from a shell line. Single
* quotes are literal, so substitutions inside them are ignored; double quotes
* still permit substitutions, so those bodies are scanned before quoted text
* is stripped.
*
* @param {string} input
* @returns {string[]}
*/
function extractCommandSubstitutions(input) {
const source = String(input || '');
const substitutions = [];
let inSingle = false;
let inDouble = false;
for (let i = 0; i < source.length; i++) {
const ch = source[i];
const prev = source[i - 1];
if (ch === '\\' && !inSingle) {
i += 1;
continue;
}
if (ch === "'" && !inDouble && prev !== '\\') {
inSingle = !inSingle;
continue;
}
if (ch === '"' && !inSingle && prev !== '\\') {
inDouble = !inDouble;
continue;
}
if (inSingle) {
continue;
}
if (ch === '`') {
let body = '';
i += 1;
while (i < source.length) {
const inner = source[i];
if (inner === '\\') {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === '`') {
break;
}
body += inner;
i += 1;
}
if (body.trim()) {
substitutions.push(body);
substitutions.push(...extractCommandSubstitutions(body));
}
continue;
}
if (ch === '$' && source[i + 1] === '(') {
let depth = 1;
let body = '';
i += 2;
while (i < source.length && depth > 0) {
const inner = source[i];
if (inner === '\\') {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === '(') {
depth += 1;
} else if (inner === ')') {
depth -= 1;
if (depth === 0) {
break;
}
}
body += inner;
i += 1;
}
if (body.trim()) {
substitutions.push(body);
substitutions.push(...extractCommandSubstitutions(body));
}
}
}
return substitutions;
}
/**
* Split a command line into top-level segments at unquoted shell
* separators (`;`, `|`, `&`, `&&`, `||`) and across subshells
@@ -392,6 +298,54 @@ function isDestructiveGit(tokens) {
* @param {string} command
* @returns {boolean}
*/
/**
* Walk every executable body reachable from a raw command line and
* return them as a flat list. Bodies that bash will execute live in
* three different syntactic constructs, each handled by a sibling
* extractor in `scripts/lib/shell-substitution.js`:
* - `$(...)` and backticks via `extractCommandSubstitutions`
* - plain `(...)` subshells via `extractSubshellGroups`
* - `{ ...; }` brace groups via `extractBraceGroups`
*
* Each extractor recurses into its own syntax. The BFS here adds
* cross-syntax discovery — e.g. a `(...)` inside a `$(...)` body, or
* a `{ ...; }` inside a `(...)` body — by feeding every harvested
* body back through all three extractors. A `seen` set bounds the
* cost to O(unique bodies).
*
* @param {string} raw
* @returns {string[]}
*/
function collectExecutableBodies(raw) {
const bodies = [raw];
const queue = [raw];
const seen = new Set();
while (queue.length) {
const current = queue.shift();
if (seen.has(current)) continue;
seen.add(current);
for (const body of extractCommandSubstitutions(current)) {
if (seen.has(body)) continue;
bodies.push(body);
queue.push(body);
}
for (const body of extractSubshellGroups(current)) {
if (seen.has(body)) continue;
bodies.push(body);
queue.push(body);
}
for (const body of extractBraceGroups(current)) {
if (seen.has(body)) continue;
bodies.push(body);
queue.push(body);
}
}
return bodies;
}
function isDestructiveBash(command) {
// The SQL/dd phrases live in command bodies, not as flag-bearing
// arguments, so we still match them by regex — but on the input
@@ -401,7 +355,7 @@ function isDestructiveBash(command) {
const flattened = explodeSubshells(stripQuotedStrings(raw));
if (DESTRUCTIVE_SQL_DD.test(flattened)) return true;
const segments = [raw, ...extractCommandSubstitutions(raw)].flatMap(splitCommandSegments);
const segments = collectExecutableBodies(raw).flatMap(splitCommandSegments);
for (const segment of segments) {
if (DESTRUCTIVE_SQL_DD.test(stripQuotedStrings(segment))) return true;
const tokens = tokenize(segment);

View File

@@ -243,4 +243,252 @@ function extractSubshellGroups(input) {
return groups;
}
module.exports = { extractCommandSubstitutions, extractSubshellGroups };
/**
* Extract bodies of `{ ...; }` brace groups.
*
* Bash brace groups run their body in the *current* shell (unlike `(...)`,
* which forks a subshell). Both forms group multiple commands, so for the
* purposes of destructive-bash and dev-server detection they are equivalent:
* a `rm -rf` or `npm run dev` inside `{ ...; }` still executes.
*
* Recognition rules match bash's own reserved-word semantics:
* - `{` is a reserved word only when followed by whitespace and preceded by
* the line start, whitespace, or a shell operator (`;`, `|`, `&`, `(`).
* So `{npm run dev}` is NOT a brace group (single token starting with `{`).
* - `}` closes the group only when preceded by `;` or whitespace.
* So `foo}` inside the body is not a closing brace.
* - Single quotes are literal; double quotes are also literal for `{`/`}`.
* - `$(...)`, backticks, and plain `(...)` spans are skipped so we don't
* double-extract bodies the sibling extractors already cover.
*
* @param {string} input
* @returns {string[]}
*/
function extractBraceGroups(input) {
const source = String(input || '');
const groups = [];
let inSingle = false;
let inDouble = false;
for (let i = 0; i < source.length; i++) {
const ch = source[i];
const prev = source[i - 1];
if (ch === '\\' && !inSingle) {
i += 1;
continue;
}
if (ch === "'" && !inDouble && prev !== '\\') {
inSingle = !inSingle;
continue;
}
if (ch === '"' && !inSingle && prev !== '\\') {
inDouble = !inDouble;
continue;
}
if (inSingle || inDouble) {
continue;
}
if (ch === '$' && source[i + 1] === '(') {
let depth = 1;
let skipInSingle = false;
let skipInDouble = false;
i += 2;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !skipInSingle) {
i += 2;
continue;
}
if (inner === "'" && !skipInDouble && innerPrev !== '\\') {
skipInSingle = !skipInSingle;
} else if (inner === '"' && !skipInSingle && innerPrev !== '\\') {
skipInDouble = !skipInDouble;
} else if (!skipInSingle && !skipInDouble) {
if (inner === '(') depth += 1;
else if (inner === ')') depth -= 1;
}
i += 1;
}
i -= 1;
continue;
}
if (ch === '`') {
i += 1;
while (i < source.length && source[i] !== '`') {
if (source[i] === '\\' && i + 1 < source.length) {
i += 2;
continue;
}
i += 1;
}
continue;
}
if (ch === '(') {
let depth = 1;
let skipInSingle = false;
let skipInDouble = false;
i += 1;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !skipInSingle) {
i += 2;
continue;
}
if (inner === "'" && !skipInDouble && innerPrev !== '\\') {
skipInSingle = !skipInSingle;
} else if (inner === '"' && !skipInSingle && innerPrev !== '\\') {
skipInDouble = !skipInDouble;
} else if (!skipInSingle && !skipInDouble) {
if (inner === '(') depth += 1;
else if (inner === ')') depth -= 1;
}
i += 1;
}
i -= 1;
continue;
}
if (ch === '{' && /\s/.test(source[i + 1] || '')) {
const prevIsBoundary = i === 0 || /[\s;|&(]/.test(prev);
if (!prevIsBoundary) continue;
let depth = 1;
let body = '';
let bodyInSingle = false;
let bodyInDouble = false;
i += 1;
while (i < source.length && depth > 0) {
const inner = source[i];
const innerPrev = source[i - 1];
if (inner === '\\' && !bodyInSingle) {
body += inner;
if (i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
}
if (inner === "'" && !bodyInDouble && innerPrev !== '\\') {
bodyInSingle = !bodyInSingle;
body += inner;
i += 1;
continue;
}
if (inner === '"' && !bodyInSingle && innerPrev !== '\\') {
bodyInDouble = !bodyInDouble;
body += inner;
i += 1;
continue;
}
if (bodyInSingle || bodyInDouble) {
body += inner;
i += 1;
continue;
}
// Skip $(...) spans — a quoted `}` or `}`-as-text inside a
// substitution body must not close the enclosing brace group.
if (inner === '$' && source[i + 1] === '(') {
body += inner + source[i + 1];
let subDepth = 1;
let subInSingle = false;
let subInDouble = false;
i += 2;
while (i < source.length && subDepth > 0) {
const c = source[i];
const p = source[i - 1];
body += c;
if (c === '\\' && !subInSingle && i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle;
else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble;
else if (!subInSingle && !subInDouble) {
if (c === '(') subDepth += 1;
else if (c === ')') subDepth -= 1;
}
i += 1;
}
continue;
}
// Skip backtick spans for the same reason.
if (inner === '`') {
body += inner;
i += 1;
while (i < source.length && source[i] !== '`') {
if (source[i] === '\\' && i + 1 < source.length) {
body += source[i] + source[i + 1];
i += 2;
continue;
}
body += source[i];
i += 1;
}
if (i < source.length) {
body += source[i];
i += 1;
}
continue;
}
// Skip plain (...) subshell spans for the same reason.
if (inner === '(') {
body += inner;
let subDepth = 1;
let subInSingle = false;
let subInDouble = false;
i += 1;
while (i < source.length && subDepth > 0) {
const c = source[i];
const p = source[i - 1];
body += c;
if (c === '\\' && !subInSingle && i + 1 < source.length) {
body += source[i + 1];
i += 2;
continue;
}
if (c === "'" && !subInDouble && p !== '\\') subInSingle = !subInSingle;
else if (c === '"' && !subInSingle && p !== '\\') subInDouble = !subInDouble;
else if (!subInSingle && !subInDouble) {
if (c === '(') subDepth += 1;
else if (c === ')') subDepth -= 1;
}
i += 1;
}
continue;
}
if (inner === '{' && /\s/.test(source[i + 1] || '')) {
// Match the outer-scan boundary rule for nested `{` so
// tokens like `foo{` (no boundary, but followed by space
// via `foo{ bar`) cannot bump nested depth.
const nestedPrevIsBoundary = /[\s;|&(]/.test(innerPrev);
if (nestedPrevIsBoundary) depth += 1;
} else if (inner === '}' && (innerPrev === ';' || /\s/.test(innerPrev))) {
depth -= 1;
if (depth === 0) {
break;
}
}
body += inner;
i += 1;
}
if (body.trim()) {
groups.push(body);
groups.push(...extractBraceGroups(body));
}
}
}
return groups;
}
module.exports = { extractCommandSubstitutions, extractSubshellGroups, extractBraceGroups };

View File

@@ -1282,6 +1282,115 @@ function runTests() {
'double-quoted dollar-paren subshell');
})) passed++; else failed++;
// --- Subshell + brace-group bypass coverage ---
// Destructive commands inside `(...)` and `{ ...; }` execute the
// same way they do at the top level, so the destructive classifier
// must see inside those bodies too. Nested parens `((...))` are
// arithmetic-evaluation syntax in bash (not a nested subshell), but
// our parser depth-tracks them conservatively — i.e. the inner
// tokens are still scanned for destructive intent. That's safety
// over precision and the right default for this gate.
if (test('denies rm -rf inside plain (...) subshell group', () => {
expectDestructiveDeny('(rm -rf /tmp/junk)', 'plain subshell group');
})) passed++; else failed++;
if (test('denies rm -rf inside ((...)) — arithmetic eval, treated conservatively', () => {
expectDestructiveDeny('((rm -rf /tmp/junk))', 'arithmetic-eval parens');
})) passed++; else failed++;
if (test('denies rm -rf inside { ...; } brace group', () => {
expectDestructiveDeny('{ rm -rf /tmp/junk; }', 'brace group');
})) passed++; else failed++;
if (test('denies git push --force inside plain (...) subshell group', () => {
expectDestructiveDeny('(git push --force origin main)',
'git-force in subshell');
})) passed++; else failed++;
if (test('denies git push --force inside { ...; } brace group', () => {
expectDestructiveDeny('{ git push --force origin main; }',
'git-force in brace group');
})) passed++; else failed++;
if (test('denies rm -rf nested across () and {} (cross-syntax)', () => {
expectDestructiveDeny('(echo y; { rm -rf /tmp/junk; })',
'() containing {} cross-syntax');
})) passed++; else failed++;
if (test('denies rm -rf nested across $() and () (cross-syntax)', () => {
expectDestructiveDeny('$(echo y; (rm -rf /tmp/junk))',
'$() containing () cross-syntax');
})) passed++; else failed++;
// Negative cases — literals and non-destructive commands must NOT
// be promoted to destructive by the new grouping-body walker.
if (test('allows literal (rm -rf ...) inside single quotes', () => {
expectAllow("git commit -m '(rm -rf /tmp/junk)'",
'single-quoted subshell literal');
})) passed++; else failed++;
if (test('allows literal (rm -rf ...) inside double quotes', () => {
expectAllow('echo "(rm -rf /tmp/junk)"',
'double-quoted subshell literal');
})) passed++; else failed++;
if (test('allows literal { rm -rf ...; } inside double quotes', () => {
expectAllow('echo "{ rm -rf /tmp/junk; }"',
'double-quoted brace-group literal');
})) passed++; else failed++;
if (test('allows non-destructive (echo hello)', () => {
expectAllow('(echo hello)', 'non-destructive subshell');
})) passed++; else failed++;
if (test('allows non-destructive { echo hello; }', () => {
expectAllow('{ echo hello; }', 'non-destructive brace group');
})) passed++; else failed++;
if (test('allows {rm -rf} — no space after { is not a brace group', () => {
// bash treats `{rm` as a single token; no destructive intent
// can be statically derived from this form, and the command
// would not actually run rm at runtime either.
expectAllow('echo {rm -rf /tmp/junk}',
'no-space brace literal');
})) passed++; else failed++;
// --- Round 1 review fixes: brace-group span-skip + boundary ---
// Verifies the body-accumulation loop in `extractBraceGroups`
// correctly walks past `$(...)`, `(...)`, and backtick spans so
// a `}` inside one of those does not terminate the brace group
// early, plus the nested `{` boundary rule.
if (test('denies rm -rf in brace group with backtick containing }', () => {
expectDestructiveDeny('{ echo `echo }`; rm -rf /tmp/junk; }',
'brace + backtick containing }');
})) passed++; else failed++;
if (test('denies rm -rf in brace group with $() containing }', () => {
expectDestructiveDeny('{ echo $(echo "}"); rm -rf /tmp/junk; }',
'brace + $() containing }');
})) passed++; else failed++;
if (test('denies rm -rf in brace group with nested () containing }', () => {
expectDestructiveDeny('{ (echo "}"); rm -rf /tmp/junk; }',
'brace + () containing }');
})) passed++; else failed++;
if (test('denies rm -rf in brace group with $() body containing }', () => {
expectDestructiveDeny('{ x=$(echo a}b); rm -rf /tmp/junk; }',
'brace + $() body with }');
})) passed++; else failed++;
if (test('denies rm -rf when token like foo{ appears before brace group close', () => {
// tokens like `foo{` are not reserved-word `{` (no boundary,
// no whitespace after) — must not bump nested-depth and so
// must not delay brace-group close
expectDestructiveDeny('{ echo foo{bar; rm -rf /tmp/junk; }',
'foo{ token inside brace body');
})) passed++; else failed++;
// Cleanup only the temp directory created by this test file.
try {
if (fs.existsSync(stateDir)) {