mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-19 10:52:28 +08:00
feat: slop-diff shows only NEW findings introduced on this branch
Runs slop-scan on HEAD and the merge-base, diffs results with line-number-insensitive fingerprinting so shifted code doesn't create false positives. Uses git worktree for clean base comparison. Shows net new vs removed findings. Runs automatically after bun test. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,7 +20,8 @@ bun run dev:skill # watch mode: auto-regen + validate on change
|
|||||||
bun run eval:list # list all eval runs from ~/.gstack-dev/evals/
|
bun run eval:list # list all eval runs from ~/.gstack-dev/evals/
|
||||||
bun run eval:compare # compare two eval runs (auto-picks most recent)
|
bun run eval:compare # compare two eval runs (auto-picks most recent)
|
||||||
bun run eval:summary # aggregate stats across all eval runs
|
bun run eval:summary # aggregate stats across all eval runs
|
||||||
bun run slop # slop-scan diagnostic (not a gate, just a number)
|
bun run slop # full slop-scan report (all files)
|
||||||
|
bun run slop:diff # slop findings in files changed on this branch only
|
||||||
```
|
```
|
||||||
|
|
||||||
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
"gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
|
"gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
|
||||||
"dev": "bun run browse/src/cli.ts",
|
"dev": "bun run browse/src/cli.ts",
|
||||||
"server": "bun run browse/src/server.ts",
|
"server": "bun run browse/src/server.ts",
|
||||||
"test": "bun test browse/test/ test/ --ignore 'test/skill-e2e-*.test.ts' --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts --ignore test/gemini-e2e.test.ts && echo '── slop-scan (diagnostic) ──' && bun run slop 2>/dev/null | tail -8 || true",
|
"test": "bun test browse/test/ test/ --ignore 'test/skill-e2e-*.test.ts' --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts --ignore test/gemini-e2e.test.ts && bun run slop:diff 2>/dev/null || true",
|
||||||
"test:evals": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
"test:evals": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:e2e": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
"test:e2e": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
@@ -34,7 +34,8 @@
|
|||||||
"eval:select": "bun run scripts/eval-select.ts",
|
"eval:select": "bun run scripts/eval-select.ts",
|
||||||
"analytics": "bun run scripts/analytics.ts",
|
"analytics": "bun run scripts/analytics.ts",
|
||||||
"test:audit": "bun test test/audit-compliance.test.ts",
|
"test:audit": "bun test test/audit-compliance.test.ts",
|
||||||
"slop": "npx slop-scan scan . 2>/dev/null || echo 'slop-scan not available (install with: npm i -g slop-scan)'"
|
"slop": "npx slop-scan scan . 2>/dev/null || echo 'slop-scan not available (install with: npm i -g slop-scan)'",
|
||||||
|
"slop:diff": "bun run scripts/slop-diff.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ngrok/ngrok": "^1.7.0",
|
"@ngrok/ngrok": "^1.7.0",
|
||||||
|
|||||||
170
scripts/slop-diff.ts
Normal file
170
scripts/slop-diff.ts
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* slop-diff: show NEW slop-scan findings introduced on this branch.
|
||||||
|
*
|
||||||
|
* Runs slop-scan on HEAD and on the merge-base, then diffs the results
|
||||||
|
* to show only findings that were added. Line-number-insensitive comparison
|
||||||
|
* so shifting code doesn't create false positives.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* bun run slop:diff # diff against main
|
||||||
|
* bun run slop:diff origin/release # diff against another base
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { spawnSync } from 'child_process';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as os from 'os';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
const base = process.argv[2] || 'main';
|
||||||
|
|
||||||
|
// 1. Find changed files
|
||||||
|
const diffResult = spawnSync('git', ['diff', '--name-only', `${base}...HEAD`], {
|
||||||
|
encoding: 'utf-8', timeout: 10000,
|
||||||
|
});
|
||||||
|
const changedFiles = new Set(
|
||||||
|
(diffResult.stdout || '').trim().split('\n').filter(Boolean)
|
||||||
|
);
|
||||||
|
if (changedFiles.size === 0) {
|
||||||
|
console.log('No files changed vs', base, '— nothing to check.');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Run slop-scan on HEAD
|
||||||
|
const scanHead = spawnSync('npx', ['slop-scan', 'scan', '.', '--json'], {
|
||||||
|
encoding: 'utf-8', timeout: 120000, shell: true,
|
||||||
|
});
|
||||||
|
if (!scanHead.stdout) {
|
||||||
|
console.log('slop-scan not available. Install: npm i -g slop-scan');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
let headReport: any;
|
||||||
|
try { headReport = JSON.parse(scanHead.stdout); } catch {
|
||||||
|
console.log('slop-scan returned invalid JSON.'); process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Get base branch findings using git stash approach
|
||||||
|
// Check out base versions of changed files, scan, then restore
|
||||||
|
const mergeBase = spawnSync('git', ['merge-base', base, 'HEAD'], {
|
||||||
|
encoding: 'utf-8', timeout: 5000,
|
||||||
|
}).stdout?.trim();
|
||||||
|
|
||||||
|
// Fingerprint: strip line numbers so shifting code doesn't create false positives
|
||||||
|
// "line 142: empty catch, boundary=none" -> "empty catch, boundary=none"
|
||||||
|
function stripLineNum(evidence: string): string {
|
||||||
|
return evidence.replace(/^line \d+: /, '').replace(/ at line \d+ /, ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count evidence items per (rule, file, stripped-evidence) for the base
|
||||||
|
const baseCounts = new Map<string, number>();
|
||||||
|
|
||||||
|
if (mergeBase) {
|
||||||
|
// Create temp worktree for base scan
|
||||||
|
const tmpWorktree = path.join(os.tmpdir(), `slop-base-${Date.now()}`);
|
||||||
|
const wtResult = spawnSync('git', ['worktree', 'add', '--detach', tmpWorktree, mergeBase], {
|
||||||
|
encoding: 'utf-8', timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (wtResult.status === 0) {
|
||||||
|
// Copy slop-scan config if it exists
|
||||||
|
const configFile = 'slop-scan.config.json';
|
||||||
|
if (fs.existsSync(configFile)) {
|
||||||
|
try { fs.copyFileSync(configFile, path.join(tmpWorktree, configFile)); } catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
const scanBase = spawnSync('npx', ['slop-scan', 'scan', tmpWorktree, '--json'], {
|
||||||
|
encoding: 'utf-8', timeout: 120000, shell: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (scanBase.stdout) {
|
||||||
|
try {
|
||||||
|
const baseReport = JSON.parse(scanBase.stdout);
|
||||||
|
for (const f of baseReport.findings) {
|
||||||
|
// Remap worktree paths back to repo-relative
|
||||||
|
const realPath = f.path.replace(tmpWorktree + '/', '');
|
||||||
|
if (!changedFiles.has(realPath)) continue;
|
||||||
|
for (const ev of f.evidence || []) {
|
||||||
|
const key = `${f.ruleId}|${realPath}|${stripLineNum(ev)}`;
|
||||||
|
baseCounts.set(key, (baseCounts.get(key) || 0) + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up worktree
|
||||||
|
spawnSync('git', ['worktree', 'remove', '--force', tmpWorktree], {
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Find genuinely new findings
|
||||||
|
// For each evidence item on HEAD, check if the base had the same (rule, file, stripped-evidence).
|
||||||
|
// Use counts to handle duplicates: if base had 2 and HEAD has 3, that's 1 new.
|
||||||
|
const headCounts = new Map<string, { count: number; evidence: string[] }>();
|
||||||
|
const headFindings = headReport.findings.filter((f: any) => changedFiles.has(f.path));
|
||||||
|
|
||||||
|
for (const f of headFindings) {
|
||||||
|
for (const ev of f.evidence || []) {
|
||||||
|
const key = `${f.ruleId}|${f.path}|${stripLineNum(ev)}`;
|
||||||
|
const entry = headCounts.get(key) || { count: 0, evidence: [] };
|
||||||
|
entry.count++;
|
||||||
|
entry.evidence.push(ev);
|
||||||
|
headCounts.set(key, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute net new
|
||||||
|
type NewFinding = { ruleId: string; filePath: string; evidence: string };
|
||||||
|
const newFindings: NewFinding[] = [];
|
||||||
|
let removedCount = 0;
|
||||||
|
|
||||||
|
for (const [key, entry] of headCounts) {
|
||||||
|
const baseCount = baseCounts.get(key) || 0;
|
||||||
|
const netNew = entry.count - baseCount;
|
||||||
|
if (netNew > 0) {
|
||||||
|
const [ruleId, filePath] = key.split('|');
|
||||||
|
// Take the last N evidence items as the "new" ones
|
||||||
|
for (const ev of entry.evidence.slice(-netNew)) {
|
||||||
|
newFindings.push({ ruleId, filePath, evidence: ev });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [key, baseCount] of baseCounts) {
|
||||||
|
const headCount = headCounts.get(key)?.count || 0;
|
||||||
|
if (headCount < baseCount) removedCount += baseCount - headCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Print results
|
||||||
|
if (newFindings.length === 0) {
|
||||||
|
if (removedCount > 0) {
|
||||||
|
console.log(`\n slop-scan: no new findings. Removed ${removedCount} pre-existing findings.\n`);
|
||||||
|
} else {
|
||||||
|
console.log(`\n slop-scan: no new findings in ${changedFiles.size} changed files.\n`);
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n── slop-scan: ${newFindings.length} new findings (+${newFindings.length} / -${removedCount}) ──\n`);
|
||||||
|
|
||||||
|
// Group by file, then by rule
|
||||||
|
const grouped = new Map<string, Map<string, string[]>>();
|
||||||
|
for (const { ruleId, filePath, evidence } of newFindings) {
|
||||||
|
if (!grouped.has(filePath)) grouped.set(filePath, new Map());
|
||||||
|
const rules = grouped.get(filePath)!;
|
||||||
|
if (!rules.has(ruleId)) rules.set(ruleId, []);
|
||||||
|
rules.get(ruleId)!.push(evidence);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [filePath, rules] of grouped) {
|
||||||
|
console.log(` ${filePath}`);
|
||||||
|
for (const [ruleId, evidence] of rules) {
|
||||||
|
console.log(` ${ruleId}:`);
|
||||||
|
for (const ev of evidence) {
|
||||||
|
console.log(` ${ev}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n Net: +${newFindings.length} new, -${removedCount} removed\n`);
|
||||||
Reference in New Issue
Block a user