1
0

context-ranking.test.ts 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /**
  2. * Context ranking: common-word precision + low-confidence handoff.
  3. *
  4. * Regression coverage for the failure where a prose query
  5. * ("capture intro onboarding screen flat object") surfaced an unrelated
  6. * constant named `FLAT` (in a download script) as a top entry point — because
  7. * the descriptive word "flat" exact-matched it and the +exact-name bonus was
  8. * exempt from single-term dampening. The fix: only distinctive identifiers earn
  9. * that exemption; an isolated common-word exact match is demoted, and a query
  10. * that resolves only to such weak matches is flagged low-confidence so the
  11. * response hands off to explore/trace instead of bluffing.
  12. */
  13. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  14. import * as fs from 'fs';
  15. import * as path from 'path';
  16. import * as os from 'os';
  17. import CodeGraph from '../src/index';
  18. import { LOW_CONFIDENCE_MARKER } from '../src/context';
  19. import { isDistinctiveIdentifier, scorePathRelevance, deriveProjectNameTokens } from '../src/search/query-utils';
  20. describe('isDistinctiveIdentifier', () => {
  21. it('treats plain dictionary words as non-distinctive', () => {
  22. for (const word of ['flat', 'object', 'screen', 'standing', 'capture']) {
  23. expect(isDistinctiveIdentifier(word)).toBe(false);
  24. }
  25. });
  26. it('treats leading-capital-only words (proper nouns / sentence start) as non-distinctive', () => {
  27. expect(isDistinctiveIdentifier('Screen')).toBe(false);
  28. expect(isDistinctiveIdentifier('Zustand')).toBe(false);
  29. });
  30. it('treats camelCase / PascalCase / snake_case / acronyms / digits as distinctive', () => {
  31. expect(isDistinctiveIdentifier('setLastEmail')).toBe(true);
  32. expect(isDistinctiveIdentifier('OrgUserStore')).toBe(true);
  33. expect(isDistinctiveIdentifier('user_store')).toBe(true);
  34. expect(isDistinctiveIdentifier('REST')).toBe(true);
  35. expect(isDistinctiveIdentifier('v2')).toBe(true);
  36. });
  37. });
  38. // A single PascalCase query word (notably a project name a user naturally
  39. // includes) splits into sub-tokens that all match the SAME path segment; summed
  40. // per sub-token it boosted that path 4×, burying the rest of the query's stack
  41. // (#720). Path relevance must count each original WORD once per level, while
  42. // still splitting it for cross-convention matching.
  43. describe('scorePathRelevance per-word scoring (#720)', () => {
  44. it('counts a single PascalCase word once per path level, not once per sub-token', () => {
  45. // "SuperBizAgent" → super/biz/agent/superbizagent all hit the dir, but it's
  46. // one concept: +5 (dir) once, not +20.
  47. expect(scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent')).toBe(5);
  48. });
  49. it('still splits a word so it matches across naming conventions', () => {
  50. // getUserName must still match a snake_case path via its sub-tokens.
  51. expect(scorePathRelevance('get_user_name.go', 'getUserName')).toBeGreaterThanOrEqual(10);
  52. });
  53. it('still credits distinct query words matching different path segments', () => {
  54. // auth (dir) and handler (filename) are separate concepts — each counts.
  55. expect(scorePathRelevance('src/auth/login_handler.go', 'auth handler')).toBeGreaterThan(
  56. scorePathRelevance('src/auth/login_handler.go', 'auth')
  57. );
  58. });
  59. });
  60. // The project name is context, not a discriminator: dropping it from path
  61. // scoring stops every file under a `<ProjectName>…/` tree from winning on the
  62. // name alone, so the rest of the query decides the ranking (#720).
  63. describe('project-name down-weighting in path relevance (#720)', () => {
  64. it('derives the project name from go.mod / package.json, skipping short names', () => {
  65. const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-projname-'));
  66. try {
  67. fs.writeFileSync(path.join(dir, 'go.mod'), 'module example.com/SuperBizAgent\n\ngo 1.21\n');
  68. fs.writeFileSync(path.join(dir, 'package.json'), JSON.stringify({ name: '@acme/superbizagent-web' }));
  69. const tokens = deriveProjectNameTokens(dir);
  70. expect(tokens.has('superbizagent')).toBe(true);
  71. expect(tokens.has('superbizagentweb')).toBe(true);
  72. } finally {
  73. fs.rmSync(dir, { recursive: true, force: true });
  74. }
  75. });
  76. it('drops a project-name query word from path scoring when other words remain', () => {
  77. const proj = new Set(['superbizagent']);
  78. // Without the project name dropped, the frontend path wins on it (+5).
  79. // With it dropped, only "backend" is left — and it doesn't match this path.
  80. const withDrop = scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent backend', proj);
  81. const noDrop = scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent backend');
  82. expect(withDrop).toBeLessThan(noDrop);
  83. expect(withDrop).toBe(0);
  84. });
  85. it('keeps the project-name word when it is the ONLY query word (bare query still scores)', () => {
  86. const proj = new Set(['superbizagent']);
  87. expect(scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent', proj)).toBe(5);
  88. });
  89. it('does not affect a query that omits the project name', () => {
  90. const proj = new Set(['superbizagent']);
  91. const path0 = 'internal/controller/chat/chat.go';
  92. expect(scorePathRelevance(path0, 'controller chat', proj)).toBe(
  93. scorePathRelevance(path0, 'controller chat')
  94. );
  95. });
  96. });
  97. describe('Context ranking — common-word precision & confidence', () => {
  98. let testDir: string;
  99. let cg: CodeGraph;
  100. beforeEach(async () => {
  101. testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ctxrank-'));
  102. // The corroborated target: a capture-flow screen whose NAME alone matches
  103. // three query terms (capture + intro + screen), and which lives under a
  104. // matching directory.
  105. const captureDir = path.join(testDir, 'src', 'app', 'capture');
  106. fs.mkdirSync(captureDir, { recursive: true });
  107. fs.writeFileSync(
  108. path.join(captureDir, 'intro.tsx'),
  109. `export function CaptureIntroScreen() {
  110. // Onboarding screen shown before the user selects flat or standing object capture.
  111. return null;
  112. }
  113. `
  114. );
  115. // The trap: an unrelated constant literally named FLAT, in a totally
  116. // different area. "flat" in a prose query exact-matches it.
  117. const scriptsDir = path.join(testDir, 'scripts', 'dataset');
  118. fs.mkdirSync(scriptsDir, { recursive: true });
  119. fs.writeFileSync(
  120. path.join(scriptsDir, 'download.ts'),
  121. `export const FLAT = 'freiburg_flat_dataset';
  122. export function downloadDataset(name: string): string { return name; }
  123. `
  124. );
  125. cg = CodeGraph.initSync(testDir, {
  126. config: { include: ['**/*.ts', '**/*.tsx'], exclude: [] },
  127. });
  128. await cg.indexAll();
  129. });
  130. afterEach(() => {
  131. if (cg) cg.destroy();
  132. if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
  133. });
  134. it('does not let a common-word exact match (FLAT) outrank a corroborated symbol', async () => {
  135. const sg = await cg.findRelevantContext(
  136. 'capture intro onboarding screen flat object'
  137. );
  138. const rootNames = sg.roots.map((id) => sg.nodes.get(id)?.name);
  139. // The corroborated capture screen surfaces as an entry point...
  140. expect(rootNames).toContain('CaptureIntroScreen');
  141. // ...and the trap constant is never the lead result (the bug we fixed).
  142. expect(rootNames[0]).not.toBe('FLAT');
  143. const capIdx = rootNames.indexOf('CaptureIntroScreen');
  144. const flatIdx = rootNames.indexOf('FLAT');
  145. if (flatIdx >= 0) expect(capIdx).toBeLessThan(flatIdx);
  146. // And it's confidently answered (we located a corroborated symbol).
  147. expect(sg.confidence).toBe('high');
  148. });
  149. it('flags low confidence and emits the handoff when only common words match', async () => {
  150. const query = 'flat object thing';
  151. const sg = await cg.findRelevantContext(query);
  152. expect(sg.confidence).toBe('low');
  153. const md = await cg.buildContext(query, { format: 'markdown' });
  154. expect(typeof md).toBe('string');
  155. expect(md as string).toContain(LOW_CONFIDENCE_MARKER);
  156. // The handoff routes to the precise tools rather than claiming completeness.
  157. expect(md as string).toMatch(/codegraph_explore/);
  158. });
  159. it('does not emit the handoff for a precise, distinctive-symbol query', async () => {
  160. const sg = await cg.findRelevantContext('CaptureIntroScreen');
  161. expect(sg.confidence).toBe('high');
  162. const md = await cg.buildContext('CaptureIntroScreen', { format: 'markdown' });
  163. expect(md as string).not.toContain(LOW_CONFIDENCE_MARKER);
  164. });
  165. });