explore-corroboration-ranking.test.ts 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /**
  2. * codegraph_explore — multi-term corroboration tier (cross-layer monorepo ranking).
  3. *
  4. * BEHAVIOURAL coverage for the `isCorroborated` tier in handleExplore's file sort:
  5. * a backend file that is BOTH an entry/central file AND matched by >=2 DISTINCT
  6. * query terms must be surfaced (rendered as a `#### <path>` source section) for a
  7. * backend-flow query in a multi-layer repo — not displaced by a denser frontend
  8. * layer. The tier exists because explore's primary file ranker is graph-centrality
  9. * (Random-Walk-with-Restart) mass, which — seeded from text matches that skew to
  10. * the bigger, internally dense layer — can bury a query-matching backend file under
  11. * an off-topic cluster. The entry/central GUARD keeps the tier safe: an INCIDENTAL
  12. * multi-term file that is neither entry nor central is NOT promoted, so it cannot
  13. * displace a graph-central answer file (the regression a blunt hits-only tier caused
  14. * on excalidraw, where `binding.ts`/`elbowArrow.ts` displaced `renderNewElementScene`).
  15. *
  16. * NOTE: the full directus-scale burial (where frontend RWR mass exceeds a
  17. * query-matching backend file) is an EMERGENT property of thousands of real frontend
  18. * symbols — a self-contained fixture can't reach the cluster size past
  19. * findRelevantContext's retrieval cap. That regression is isolated by the
  20. * deterministic ranking harness on real indexes (directus/n8n/excalidraw), where the
  21. * api/ service moves from "absent/mentioned" to "sourced" with no control regression.
  22. * These tests lock the user-visible behaviour the tier guarantees on a fixture.
  23. */
  24. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  25. import * as fs from 'fs';
  26. import * as path from 'path';
  27. import * as os from 'os';
  28. import CodeGraph from '../src/index';
  29. import { ToolHandler } from '../src/mcp/tools';
  30. /** Paths that explore rendered as full-body `#### <path> —` source sections. */
  31. function sourcedFiles(text: string): string[] {
  32. const out: string[] = [];
  33. for (const line of text.split('\n')) {
  34. const m = line.match(/^#### (.+?) —/);
  35. if (m) out.push(m[1].trim());
  36. }
  37. return out;
  38. }
  39. describe('codegraph_explore — multi-term corroboration tier', () => {
  40. let testDir: string;
  41. let cg: CodeGraph;
  42. let handler: ToolHandler;
  43. beforeEach(async () => {
  44. testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-corrob-'));
  45. // --- The large, internally DENSE frontend layer ---------------------------
  46. // Many `app/` files whose SYMBOLS all match the word "item" and form a tight
  47. // call mesh, so Random-Walk-with-Restart mass (seeded from those text matches)
  48. // concentrates here. They are NOT the answer to a backend query — but at scale
  49. // their cluster mass out-ranks the call-isolated backend file.
  50. // "item" is a PATH token (app/item/...) so FTS (token-based, not substring)
  51. // retrieves every file for the query term "item" — matching directus's `app/`
  52. // tree where "item" is a real path/symbol token, not a camelCase fragment.
  53. const appItem = path.join(testDir, 'app', 'item');
  54. fs.mkdirSync(appItem, { recursive: true });
  55. const N = 30;
  56. for (let i = 0; i < N; i++) {
  57. const next = (i + 1) % N;
  58. const prev = (i + N - 1) % N;
  59. // Each file imports two neighbours → a dense mesh of `references`/`calls`.
  60. // snake_case so FTS tokenizes "item" out of the symbol name (camelCase would
  61. // leave `itemview0` as a single unmatchable token).
  62. fs.writeFileSync(path.join(appItem, `view${i}.ts`),
  63. `import { item_view_${next} } from './view${next}';\n` +
  64. `import { item_view_${prev} } from './view${prev}';\n` +
  65. `export function item_view_${i}() {\n` +
  66. ` return item_view_${next}() + item_view_${prev}();\n` +
  67. `}\n`);
  68. }
  69. // --- The small, call-ISOLATED backend file (the answer) -------------------
  70. // Its PATH matches TWO distinct query terms (api/item/service.ts → item +
  71. // service), so it IS a search root (an entry file) with file-term-hits >=2 —
  72. // but its generic SYMBOLS don't text-match, and nothing in the frontend mesh
  73. // calls it, so it gets no RWR inflow and its restart mass is diluted across the
  74. // large frontend seed set. This is the directus shape: ItemsService is
  75. // search-relevant by name/path yet call-isolated from the frontend seed cluster,
  76. // so RWR alone buries it under the mesh. Only the corroboration tier (path/name
  77. // matches >=2 query terms AND it's an entry file) keeps it in.
  78. const apiItem = path.join(testDir, 'api', 'item');
  79. fs.mkdirSync(apiItem, { recursive: true });
  80. fs.writeFileSync(path.join(apiItem, 'service.ts'),
  81. `export class DataService {\n` +
  82. ` read() { return this.load(); }\n` +
  83. ` load(): string[] { return []; }\n` +
  84. `}\n`);
  85. cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
  86. await cg.indexAll();
  87. handler = new ToolHandler(cg);
  88. });
  89. afterEach(() => {
  90. if (cg) cg.destroy();
  91. if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
  92. });
  93. it('sources the corroborated backend file alongside a denser frontend cluster in a multi-layer repo', async () => {
  94. const res = await handler.execute('codegraph_explore', { query: 'item service' });
  95. const text = res.content[0].text;
  96. const sourced = sourcedFiles(text);
  97. // The backend service — matched by item+service and a search root — must
  98. // be rendered, not truncated out by the frontend mesh's graph mass.
  99. expect(sourced).toContain('api/item/service.ts');
  100. });
  101. it('still leads with the backend file when the query names its symbol directly', async () => {
  102. // A query naming the backend symbol directly: the answer is the DataService
  103. // file; the frontend mesh stays subordinate (it matches only "item").
  104. const res = await handler.execute('codegraph_explore', { query: 'DataService read load' });
  105. const text = res.content[0].text;
  106. const sourced = sourcedFiles(text);
  107. expect(sourced).toContain('api/item/service.ts');
  108. // The named backend file leads — it is not displaced by the frontend layer.
  109. expect(sourced[0]).toBe('api/item/service.ts');
  110. });
  111. });