explore-corroboration-ranking.test.ts 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /**
  2. * codegraph_explore — multi-term corroboration tier (cross-layer monorepo ranking).
  3. *
  4. * BEHAVIOURAL coverage for the `isCorroborated` tier in handleExplore's file sort:
  5. * a backend file that is BOTH an entry/central file AND matched by >=2 DISTINCT
  6. * query terms must be surfaced (rendered as a `#### <path>` source section) for a
  7. * backend-flow query in a multi-layer repo — not displaced by a denser frontend
  8. * layer. The tier exists because explore's primary file ranker is graph-centrality
  9. * (Random-Walk-with-Restart) mass, which — seeded from text matches that skew to
  10. * the bigger, internally dense layer — can bury a query-matching backend file under
  11. * an off-topic cluster. The entry/central GUARD keeps the tier safe: an INCIDENTAL
  12. * multi-term file that is neither entry nor central is NOT promoted, so it cannot
  13. * displace a graph-central answer file (the regression a blunt hits-only tier caused
  14. * on excalidraw, where `binding.ts`/`elbowArrow.ts` displaced `renderNewElementScene`).
  15. *
  16. * NOTE: the full directus-scale burial (where frontend RWR mass exceeds a
  17. * query-matching backend file) is an EMERGENT property of thousands of real frontend
  18. * symbols — a self-contained fixture can't reach the cluster size past
  19. * findRelevantContext's retrieval cap. That regression is isolated by the
  20. * deterministic ranking harness on real indexes (directus/n8n/excalidraw), where the
  21. * api/ service moves from "absent/mentioned" to "sourced" with no control regression.
  22. * These tests lock the user-visible behaviour the tier guarantees on a fixture.
  23. */
  24. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  25. import * as fs from 'fs';
  26. import * as path from 'path';
  27. import * as os from 'os';
  28. import CodeGraph from '../src/index';
  29. import { ToolHandler } from '../src/mcp/tools';
  30. /** Paths that explore rendered as full-body ``**`<path>`** —`` source sections.
  31. * Headers are bold labels, not ATX headings (issue #778). */
  32. function sourcedFiles(text: string): string[] {
  33. const out: string[] = [];
  34. for (const line of text.split('\n')) {
  35. const m = line.match(/^\*\*`(.+?)`\*\* —/);
  36. if (m) out.push(m[1].trim());
  37. }
  38. return out;
  39. }
  40. describe('codegraph_explore — multi-term corroboration tier', () => {
  41. let testDir: string;
  42. let cg: CodeGraph;
  43. let handler: ToolHandler;
  44. beforeEach(async () => {
  45. testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-corrob-'));
  46. // --- The large, internally DENSE frontend layer ---------------------------
  47. // Many `app/` files whose SYMBOLS all match the word "item" and form a tight
  48. // call mesh, so Random-Walk-with-Restart mass (seeded from those text matches)
  49. // concentrates here. They are NOT the answer to a backend query — but at scale
  50. // their cluster mass out-ranks the call-isolated backend file.
  51. // "item" is a PATH token (app/item/...) so FTS (token-based, not substring)
  52. // retrieves every file for the query term "item" — matching directus's `app/`
  53. // tree where "item" is a real path/symbol token, not a camelCase fragment.
  54. const appItem = path.join(testDir, 'app', 'item');
  55. fs.mkdirSync(appItem, { recursive: true });
  56. const N = 30;
  57. for (let i = 0; i < N; i++) {
  58. const next = (i + 1) % N;
  59. const prev = (i + N - 1) % N;
  60. // Each file imports two neighbours → a dense mesh of `references`/`calls`.
  61. // snake_case so FTS tokenizes "item" out of the symbol name (camelCase would
  62. // leave `itemview0` as a single unmatchable token).
  63. fs.writeFileSync(path.join(appItem, `view${i}.ts`),
  64. `import { item_view_${next} } from './view${next}';\n` +
  65. `import { item_view_${prev} } from './view${prev}';\n` +
  66. `export function item_view_${i}() {\n` +
  67. ` return item_view_${next}() + item_view_${prev}();\n` +
  68. `}\n`);
  69. }
  70. // --- The small, call-ISOLATED backend file (the answer) -------------------
  71. // Its PATH matches TWO distinct query terms (api/item/service.ts → item +
  72. // service), so it IS a search root (an entry file) with file-term-hits >=2 —
  73. // but its generic SYMBOLS don't text-match, and nothing in the frontend mesh
  74. // calls it, so it gets no RWR inflow and its restart mass is diluted across the
  75. // large frontend seed set. This is the directus shape: ItemsService is
  76. // search-relevant by name/path yet call-isolated from the frontend seed cluster,
  77. // so RWR alone buries it under the mesh. Only the corroboration tier (path/name
  78. // matches >=2 query terms AND it's an entry file) keeps it in.
  79. const apiItem = path.join(testDir, 'api', 'item');
  80. fs.mkdirSync(apiItem, { recursive: true });
  81. fs.writeFileSync(path.join(apiItem, 'service.ts'),
  82. `export class DataService {\n` +
  83. ` read() { return this.load(); }\n` +
  84. ` load(): string[] { return []; }\n` +
  85. `}\n`);
  86. cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
  87. await cg.indexAll();
  88. handler = new ToolHandler(cg);
  89. });
  90. afterEach(() => {
  91. if (cg) cg.destroy();
  92. if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
  93. });
  94. it('sources the corroborated backend file alongside a denser frontend cluster in a multi-layer repo', async () => {
  95. const res = await handler.execute('codegraph_explore', { query: 'item service' });
  96. const text = res.content[0].text;
  97. const sourced = sourcedFiles(text);
  98. // The backend service — matched by item+service and a search root — must
  99. // be rendered, not truncated out by the frontend mesh's graph mass.
  100. expect(sourced).toContain('api/item/service.ts');
  101. });
  102. it('still leads with the backend file when the query names its symbol directly', async () => {
  103. // A query naming the backend symbol directly: the answer is the DataService
  104. // file; the frontend mesh stays subordinate (it matches only "item").
  105. const res = await handler.execute('codegraph_explore', { query: 'DataService read load' });
  106. const text = res.content[0].text;
  107. const sourced = sourcedFiles(text);
  108. expect(sourced).toContain('api/item/service.ts');
  109. // The named backend file leads — it is not displaced by the frontend layer.
  110. expect(sourced[0]).toBe('api/item/service.ts');
  111. });
  112. });