runner.ts 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import { execSync } from 'child_process';
  2. import * as fs from 'fs';
  3. import * as path from 'path';
  4. import { CodeGraph } from '../../src/index.js';
  5. import { scoreSearchNodes, scoreFindRelevantContext } from './scoring.js';
  6. import { testCases } from './test-cases.js';
  7. import type { EvalReport, EvalResult } from './types.js';
  8. const codebasePath = process.env.EVAL_CODEBASE || process.argv[2];
  9. if (!codebasePath) {
  10. console.error('Usage: EVAL_CODEBASE=/path/to/codebase npx tsx __tests__/evaluation/runner.ts');
  11. console.error(' or: npx tsx __tests__/evaluation/runner.ts /path/to/codebase');
  12. process.exit(1);
  13. }
  14. const resolvedPath = path.resolve(codebasePath);
  15. if (!fs.existsSync(path.join(resolvedPath, '.codegraph', 'codegraph.db'))) {
  16. console.error(`No .codegraph/codegraph.db found at ${resolvedPath}`);
  17. process.exit(1);
  18. }
  19. let codegraphSha = 'unknown';
  20. try {
  21. codegraphSha = execSync('git rev-parse --short HEAD', { encoding: 'utf-8' }).trim();
  22. } catch {}
  23. console.log(`\nCodeGraph Eval — ${path.basename(resolvedPath)}`);
  24. console.log(`Codebase: ${resolvedPath}`);
  25. console.log(`Commit: ${codegraphSha}`);
  26. console.log(`Cases: ${testCases.length}`);
  27. console.log('');
  28. async function run() {
  29. const cg = CodeGraph.openSync(resolvedPath);
  30. const results: EvalResult[] = [];
  31. for (const tc of testCases) {
  32. const start = performance.now();
  33. if (tc.api === 'searchNodes') {
  34. const searchResults = cg.searchNodes(tc.query, {
  35. limit: 10,
  36. kinds: tc.kinds,
  37. ...(tc.options as Record<string, unknown>),
  38. });
  39. const latency = performance.now() - start;
  40. const result = scoreSearchNodes(tc.id, tc.expectedSymbols, searchResults, latency);
  41. results.push(result);
  42. } else {
  43. const subgraph = await cg.findRelevantContext(tc.query, {
  44. searchLimit: 8,
  45. traversalDepth: 3,
  46. maxNodes: 80,
  47. minScore: 0.2,
  48. ...(tc.options as Record<string, unknown>),
  49. });
  50. const latency = performance.now() - start;
  51. const result = scoreFindRelevantContext(tc.id, tc.expectedSymbols, subgraph, latency);
  52. results.push(result);
  53. }
  54. }
  55. cg.close();
  56. // Print results table
  57. const maxIdLen = Math.max(...results.map((r) => r.caseId.length));
  58. for (const r of results) {
  59. const status = r.pass ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m';
  60. const id = r.caseId.padEnd(maxIdLen);
  61. const recall = `recall=${r.recall.toFixed(2)}`;
  62. const extra =
  63. r.edgeDensity !== undefined
  64. ? `density=${r.edgeDensity.toFixed(2)}`
  65. : `mrr=${r.mrr.toFixed(2)}`;
  66. const latency = `${Math.round(r.latencyMs)}ms`;
  67. console.log(` ${id} ${status} ${recall} ${extra} ${latency}`);
  68. if (r.missedSymbols.length > 0) {
  69. console.log(` ${' '.repeat(maxIdLen)} missed: ${r.missedSymbols.join(', ')}`);
  70. }
  71. }
  72. // Summary
  73. const passed = results.filter((r) => r.pass).length;
  74. const failed = results.length - passed;
  75. const meanRecall = results.reduce((s, r) => s + r.recall, 0) / results.length;
  76. const mrrResults = results.filter((r) => r.mrr > 0 || r.caseId.startsWith('search-'));
  77. const meanMRR =
  78. mrrResults.length > 0 ? mrrResults.reduce((s, r) => s + r.mrr, 0) / mrrResults.length : 0;
  79. console.log('');
  80. const summaryColor = failed === 0 ? '\x1b[32m' : '\x1b[33m';
  81. console.log(
  82. `${summaryColor}SUMMARY: ${passed}/${results.length} passed | recall=${meanRecall.toFixed(2)} | mrr=${meanMRR.toFixed(2)}\x1b[0m`
  83. );
  84. // Save JSON report
  85. const report: EvalReport = {
  86. timestamp: new Date().toISOString(),
  87. codebasePath: resolvedPath,
  88. codegraphSha,
  89. summary: { total: results.length, passed, failed, meanRecall, meanMRR },
  90. results,
  91. };
  92. const resultsDir = path.join(__dirname, 'results');
  93. fs.mkdirSync(resultsDir, { recursive: true });
  94. const reportFile = path.join(
  95. resultsDir,
  96. `${new Date().toISOString().replace(/[:.]/g, '-')}.json`
  97. );
  98. fs.writeFileSync(reportFile, JSON.stringify(report, null, 2));
  99. console.log(`\nReport saved: ${reportFile}`);
  100. process.exit(failed > 0 ? 1 : 0);
  101. }
  102. run().catch((err) => {
  103. console.error(err);
  104. process.exit(1);
  105. });