feat: Review Army — parallel specialist reviewers for /review (v0.14.3.0) (#692)

* feat: extend gstack-diff-scope with SCOPE_MIGRATIONS, SCOPE_API, SCOPE_AUTH Three new scope signals for Review Army specialist activation: - SCOPE_MIGRATIONS: db/migrate/, prisma/migrations/, alembic/, *.sql - SCOPE_API: *controller*, *route*, *endpoint*, *.graphql, openapi.* - SCOPE_AUTH: *auth*, *session*, *jwt*, *oauth*, *permission*, *role* * feat: add 7 specialist checklist files for Review Army - testing.md (always-on): coverage gaps, flaky patterns, security enforcement - maintainability.md (always-on): dead code, DRY, stale comments - security.md (conditional): OWASP deep analysis, auth bypass, injection - performance.md (conditional): N+1 queries, bundle impact, complexity - data-migration.md (conditional): reversibility, lock duration, backfill - api-contract.md (conditional): breaking changes, versioning, error format - red-team.md (conditional): adversarial analysis, cross-cutting concerns All use standard header with JSON output schema and NO FINDINGS fallback. * feat: Review Army resolver — parallel specialist dispatch + merge New resolver in review-army.ts generates template prose for: - Stack detection and specialist selection - Parallel Agent tool dispatch with learning-informed prompts - JSON finding collection, fingerprint dedup, consensus highlighting - PR quality score computation - Red Team conditional dispatch Registered as REVIEW_ARMY in resolvers/index.ts. * refactor: restructure /review template for Review Army - Replace Steps 4-4.75 with CRITICAL pass + {{REVIEW_ARMY}} - Remove {{DESIGN_REVIEW_LITE}} and {{TEST_COVERAGE_AUDIT_REVIEW}} (subsumed into Design and Testing specialists respectively) - Extract specialist-covered categories from checklist.md - Keep CRITICAL + uncovered INFORMATIONAL in main agent pass * test: Review Army — 14 diff-scope tests + 7 E2E tests - test/diff-scope.test.ts: 14 tests for all 9 scope signals - test/skill-e2e-review-army.test.ts: 7 E2E tests Gate: migration safety, N+1 detection, delivery audit, quality score, JSON findings Periodic: red team, consensus - Updated gen-skill-docs tests for new review structure - Added touchfile entries and tier classifications * docs: update SELF_LEARNING_V0.md with Release 2 status + Release 2.5 Mark Release 2 (Review Army) as in-progress. Add Release 2.5 for deferred expansions (E1 adaptive gating, E3 test stubs, E5 cross-review dedup, E7 specialist tracking). * chore: bump version and changelog (v0.14.3.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-16 01:02:13 +08:00 · 2026-03-30 22:07:50 -06:00
parent a0328be04c
commit a4a181ca92
24 changed files with 1666 additions and 308 deletions
--- a/test/gen-skill-docs.test.ts
+++ b/test/gen-skill-docs.test.ts
@@ -607,7 +607,8 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
  const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
  const reviewSkill = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');

-  test('all three modes share codepath tracing methodology', () => {
+  test('plan and ship modes share codepath tracing methodology', () => {
+    // Review mode delegates test coverage to the Testing specialist subagent (Review Army)
    const sharedPhrases = [
      'Trace data flow',
      'Diagram the execution',
@@ -619,33 +620,40 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
    for (const phrase of sharedPhrases) {
      expect(planSkill).toContain(phrase);
      expect(shipSkill).toContain(phrase);
-      expect(reviewSkill).toContain(phrase);
    }
    // Plan mode traces the plan, not a git diff
    expect(planSkill).toContain('Trace every codepath in the plan');
    expect(planSkill).not.toContain('git diff origin');
-    // Ship and review modes trace the diff
+    // Ship mode traces the diff
    expect(shipSkill).toContain('Trace every codepath changed');
-    expect(reviewSkill).toContain('Trace every codepath changed');
  });

-  test('all three modes include E2E decision matrix', () => {
-    for (const skill of [planSkill, shipSkill, reviewSkill]) {
+  test('review mode uses Review Army for specialist dispatch', () => {
+    expect(reviewSkill).toContain('Review Army');
+    expect(reviewSkill).toContain('Specialist Dispatch');
+    expect(reviewSkill).toContain('testing.md');
+  });
+
+  test('plan and ship modes include E2E decision matrix', () => {
+    // Review mode delegates to Testing specialist
+    for (const skill of [planSkill, shipSkill]) {
      expect(skill).toContain('E2E Test Decision Matrix');
      expect(skill).toContain('→E2E');
      expect(skill).toContain('→EVAL');
    }
  });

-  test('all three modes include regression rule', () => {
-    for (const skill of [planSkill, shipSkill, reviewSkill]) {
+  test('plan and ship modes include regression rule', () => {
+    // Review mode delegates to Testing specialist
+    for (const skill of [planSkill, shipSkill]) {
      expect(skill).toContain('REGRESSION RULE');
      expect(skill).toContain('IRON RULE');
    }
  });

-  test('all three modes include test framework detection', () => {
-    for (const skill of [planSkill, shipSkill, reviewSkill]) {
+  test('plan and ship modes include test framework detection', () => {
+    // Review mode delegates to Testing specialist
+    for (const skill of [planSkill, shipSkill]) {
      expect(skill).toContain('Test Framework Detection');
      expect(skill).toContain('CLAUDE.md');
    }
@@ -664,11 +672,12 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
    expect(shipSkill).toContain('ship-test-plan');
  });

-  test('review mode generates via Fix-First + gaps are INFORMATIONAL', () => {
+  test('review mode uses Fix-First + Review Army for specialist coverage', () => {
    expect(reviewSkill).toContain('Fix-First');
    expect(reviewSkill).toContain('INFORMATIONAL');
-    expect(reviewSkill).toContain('Step 4.75');
-    expect(reviewSkill).toContain('subsumes the "Test Gaps" category');
+    // Review Army handles test coverage via Testing specialist subagent
+    expect(reviewSkill).toContain('Review Army');
+    expect(reviewSkill).toContain('Testing');
  });

  test('plan mode does NOT include ship-specific content', () => {
@@ -683,6 +692,35 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
    expect(reviewSkill).not.toContain('ship-test-plan');
  });

+  test('review/specialists/ directory has all expected checklist files', () => {
+    const specDir = path.join(ROOT, 'review', 'specialists');
+    const expected = [
+      'testing.md',
+      'maintainability.md',
+      'security.md',
+      'performance.md',
+      'data-migration.md',
+      'api-contract.md',
+      'red-team.md',
+    ];
+    for (const f of expected) {
+      expect(fs.existsSync(path.join(specDir, f))).toBe(true);
+    }
+  });
+
+  test('each specialist file has standard header with scope and output format', () => {
+    const specDir = path.join(ROOT, 'review', 'specialists');
+    const files = fs.readdirSync(specDir).filter(f => f.endsWith('.md'));
+    for (const f of files) {
+      const content = fs.readFileSync(path.join(specDir, f), 'utf-8');
+      // All specialist files must have Scope and Output/JSON in header
+      expect(content).toContain('Scope:');
+      expect(content.toLowerCase()).toMatch(/output|json/);
+      // Must define NO FINDINGS behavior
+      expect(content).toContain('NO FINDINGS');
+    }
+  });
+
  // Regression guard: ship output contains key phrases from before the refactor
  test('ship SKILL.md regression guard — key phrases preserved', () => {
    const regressionPhrases = [
@@ -870,12 +908,9 @@ describe('Coverage gate in ship', () => {
    expect(shipSkill).toContain('could not determine percentage — skipping');
  });

-  test('review SKILL.md contains coverage WARNING', () => {
-    expect(reviewSkill).toContain('COVERAGE WARNING');
-    expect(reviewSkill).toContain('Consider writing tests before running /ship');
-  });
-
-  test('review coverage warning is INFORMATIONAL', () => {
+  test('review SKILL.md delegates coverage to Testing specialist', () => {
+    // Coverage audit moved to Testing specialist subagent in Review Army
+    expect(reviewSkill).toContain('testing.md');
    expect(reviewSkill).toContain('INFORMATIONAL');
  });
 });
@@ -1604,10 +1639,9 @@ describe('Codex generation (--host codex)', () => {
    const content = fs.readFileSync(path.join(AGENTS_DIR, 'gstack-review', 'SKILL.md'), 'utf-8');
    // Correct: references to sidecar files use gstack/review/ path
    expect(content).toContain('.agents/skills/gstack/review/checklist.md');
-    expect(content).toContain('.agents/skills/gstack/review/design-checklist.md');
+    // design-checklist.md is now referenced via Review Army specialist (Claude only, stripped for Codex)
    // Wrong: must NOT reference gstack-review/checklist.md (file doesn't exist there)
    expect(content).not.toContain('.agents/skills/gstack-review/checklist.md');
-    expect(content).not.toContain('.agents/skills/gstack-review/design-checklist.md');
  });

  test('sidecar paths in ship skill point to gstack/review/ for pre-landing review', () => {