mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-17 01:31:26 +08:00
Merge remote-tracking branch 'origin/main' into garrytan/eng-review-askuser-fix
# Conflicts: # test/helpers/touchfiles.ts
This commit is contained in:
@@ -136,14 +136,21 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
|
||||
// Gate-tier reviewCount-floor counterparts. Catch the May 2026 transcript
|
||||
// bug (model wrote a plan-mode plan and ExitPlanMode'd without firing any
|
||||
// review-phase AskUserQuestion). Same harness as the periodic
|
||||
// finding-count tests (runPlanSkillCounting), smaller seeds, floor=1
|
||||
// assertion. ~6 min wall time per test, ~25 min total for all four.
|
||||
// review-phase AskUserQuestion). Uses runPlanSkillFloorCheck — minimal
|
||||
// "did agent fire ANY AUQ?" observer that exits early on first non-permission
|
||||
// numbered-option render. ~1-3 min typical wall time per test, ~$2-6 total.
|
||||
'plan-eng-finding-floor': ['plan-eng-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-eng-finding-floor.test.ts'],
|
||||
'plan-ceo-finding-floor': ['plan-ceo-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-ceo-finding-floor.test.ts'],
|
||||
'plan-design-finding-floor': ['plan-design-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-design-finding-floor.test.ts'],
|
||||
'plan-devex-finding-floor': ['plan-devex-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-devex-finding-floor.test.ts'],
|
||||
'brain-privacy-gate': ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-brain-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],
|
||||
'brain-privacy-gate': ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-artifacts-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],
|
||||
|
||||
// /setup-gbrain Path 4 (Remote MCP) — happy + bad-token end-to-end via
|
||||
// Agent SDK. Gate-tier (deterministic stub server, fixed inputs); fires
|
||||
// when the skill template, the verify helper, the artifacts-init helper,
|
||||
// or the detect script changes.
|
||||
'setup-gbrain-remote': ['setup-gbrain/SKILL.md.tmpl', 'bin/gstack-gbrain-mcp-verify', 'bin/gstack-artifacts-init', 'bin/gstack-gbrain-detect', 'test/helpers/agent-sdk-runner.ts'],
|
||||
'setup-gbrain-bad-token': ['setup-gbrain/SKILL.md.tmpl', 'bin/gstack-gbrain-mcp-verify', 'test/helpers/agent-sdk-runner.ts'],
|
||||
|
||||
// AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10)
|
||||
// Fires when either template OR the two preamble resolvers change.
|
||||
@@ -441,6 +448,16 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
// costs ~$0.30-$0.50 per run, not needed on every commit)
|
||||
'brain-privacy-gate': 'periodic',
|
||||
|
||||
// /setup-gbrain Path 4 (Remote MCP) — periodic-tier. The stub HTTP
|
||||
// server is deterministic but the model's interpretation of "follow
|
||||
// Path 4 only" is not — assertions on which steps the model ran are
|
||||
// flaky. The deterministic gate-tier coverage for Path 4 lives in
|
||||
// test/setup-gbrain-path4-structure.test.ts (free, <200ms). These
|
||||
// E2E tests stay available for on-demand verification of the live
|
||||
// model's behavior against a stub MCP server.
|
||||
'setup-gbrain-remote': 'periodic',
|
||||
'setup-gbrain-bad-token': 'periodic',
|
||||
|
||||
// AskUserQuestion format regression — periodic (Opus 4.7 non-deterministic benchmark)
|
||||
'plan-ceo-review-format-mode': 'periodic',
|
||||
'plan-ceo-review-format-approach': 'periodic',
|
||||
|
||||
Reference in New Issue
Block a user