mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-18 02:22:04 +08:00
test(e2e): privacy-gate AskUserQuestion fires from preamble (periodic tier)
Two periodic-tier E2E tests exercising the preamble's privacy gate
end-to-end via the Agent SDK + canUseTool. Previously uncovered:
- Positive: stages a fake gbrain on PATH + gbrain_sync_mode_prompted=false
in config, runs a real skill, intercepts tool-use. Asserts the
preamble fires a 3-option AskUserQuestion matching the canonical
prose ("publish session memory" / "artifact" / "decline") and does
NOT fire a second time in the same run (idempotency within session).
- Negative: same staging but prompted=true. Asserts the gate stays
silent even with gbrain detected on the host.
Registered in test/helpers/touchfiles.ts as `brain-privacy-gate`
(periodic) with dependency tracking on generate-brain-sync-block.ts,
the three gstack-brain-* bins, gstack-config, and the Agent SDK runner.
Diff-based selection re-runs the E2E when any of those change.
Cost: ~$0.30-$0.50 per run. Only fires under EVALS=1 EVALS_TIER=periodic;
gate tier stays free.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -92,6 +92,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'plan-devex-review-plan-mode': ['plan-devex-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'scripts/question-registry.ts', 'scripts/one-way-doors.ts', 'test/helpers/agent-sdk-runner.ts'],
|
||||
'plan-mode-no-op': ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts'],
|
||||
'e2e-harness-audit': ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'test/helpers/agent-sdk-runner.ts'],
|
||||
'brain-privacy-gate': ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-brain-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],
|
||||
|
||||
// AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10)
|
||||
// Fires when either template OR the two preamble resolvers change.
|
||||
@@ -336,6 +337,10 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'plan-mode-no-op': 'gate',
|
||||
'e2e-harness-audit': 'gate',
|
||||
|
||||
// Privacy gate for gstack-brain-sync — periodic (non-deterministic LLM call,
|
||||
// costs ~$0.30-$0.50 per run, not needed on every commit)
|
||||
'brain-privacy-gate': 'periodic',
|
||||
|
||||
// AskUserQuestion format regression — periodic (Opus 4.7 non-deterministic benchmark)
|
||||
'plan-ceo-review-format-mode': 'periodic',
|
||||
'plan-ceo-review-format-approach': 'periodic',
|
||||
|
||||
Reference in New Issue
Block a user