| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- /**
- * Evaluation Tests
- *
- * Runs the evaluation suite as part of the test suite.
- * Use `npm run test:eval` to run just these tests.
- */
- import { describe, it, expect, beforeAll, afterAll } from 'vitest';
- import * as path from 'path';
- import * as fs from 'fs';
- import CodeGraph from '../../src/index';
- import type { TestCase, TestCaseResult } from './types';
- import { typescriptFixture } from './fixtures/typescript-project/ground-truth';
- import { pythonFixture } from './fixtures/python-project/ground-truth';
- /**
- * Extract symbol names from nodes
- */
- function extractSymbolNames(nodes: { name: string }[]): Set<string> {
- return new Set(nodes.map(n => n.name.toLowerCase()));
- }
- /**
- * Normalize symbol name
- */
- function normalizeSymbol(symbol: string): string {
- return symbol.split('.').pop()?.toLowerCase() || symbol.toLowerCase();
- }
- /**
- * Check if symbol matches
- */
- function symbolMatches(symbol: string, candidates: Set<string>): boolean {
- const normalized = normalizeSymbol(symbol);
- for (const candidate of candidates) {
- if (normalizeSymbol(candidate) === normalized) return true;
- }
- return false;
- }
- /**
- * Find a target node by name, supporting qualified names like "ClassName.methodName"
- */
- function findTargetNode(cg: CodeGraph, targetSymbol: string): { id: string; name: string } | null {
- // Check if it's a qualified name (e.g., "OrderService.createOrder")
- const parts = targetSymbol.split('.');
- if (parts.length === 2) {
- const [className, methodName] = parts;
- // Search for the method name and filter by qualified name containing the class
- const results = cg.searchNodes(methodName!, { limit: 20 });
- for (const r of results) {
- if (r.node.qualifiedName.includes(className!) && r.node.name === methodName) {
- return { id: r.node.id, name: r.node.name };
- }
- }
- }
- // Fall back to simple search
- const results = cg.searchNodes(targetSymbol, { limit: 1 });
- if (results.length > 0 && results[0]) {
- return { id: results[0].node.id, name: results[0].node.name };
- }
- return null;
- }
- /**
- * Run a single test case and return metrics
- */
- async function runSingleTest(cg: CodeGraph, testCase: TestCase): Promise<TestCaseResult> {
- let retrievedNodes: { name: string; id: string }[] = [];
- switch (testCase.type) {
- case 'search': {
- const results = cg.searchNodes(testCase.query, { limit: 20 });
- retrievedNodes = results.map(r => ({ name: r.node.name, id: r.node.id }));
- break;
- }
- case 'context': {
- // Use buildContext to get semantic search + graph traversal
- const context = await cg.buildContext(testCase.query, {
- maxNodes: 30,
- traversalDepth: 2,
- searchLimit: 5,
- format: 'object',
- });
- // Extract nodes from the subgraph
- if (typeof context !== 'string' && context.subgraph) {
- retrievedNodes = Array.from(context.subgraph.nodes.values()).map(n => ({
- name: n.name,
- id: n.id,
- }));
- }
- break;
- }
- case 'callers': {
- if (testCase.targetSymbol) {
- const targetNode = findTargetNode(cg, testCase.targetSymbol);
- if (targetNode) {
- const callers = cg.getCallers(targetNode.id);
- retrievedNodes = callers.map(c => ({ name: c.node.name, id: c.node.id }));
- }
- }
- break;
- }
- case 'callees': {
- if (testCase.targetSymbol) {
- const targetNode = findTargetNode(cg, testCase.targetSymbol);
- if (targetNode) {
- const callees = cg.getCallees(targetNode.id);
- retrievedNodes = callees.map(c => ({ name: c.node.name, id: c.node.id }));
- }
- }
- break;
- }
- case 'impact': {
- if (testCase.targetSymbol) {
- const targetNode = findTargetNode(cg, testCase.targetSymbol);
- if (targetNode) {
- const impact = cg.getImpactRadius(targetNode.id, 2);
- retrievedNodes = Array.from(impact.nodes.values()).map(n => ({ name: n.name, id: n.id }));
- }
- }
- break;
- }
- }
- // Calculate metrics
- const retrievedSymbols = extractSymbolNames(retrievedNodes);
- const truePositives: string[] = [];
- const falsePositives: string[] = [];
- for (const symbol of retrievedSymbols) {
- if (symbolMatches(symbol, new Set(testCase.expectedSymbols))) {
- truePositives.push(symbol);
- } else if (symbolMatches(symbol, new Set(testCase.irrelevantSymbols))) {
- falsePositives.push(symbol);
- }
- }
- const falseNegatives: string[] = [];
- for (const expected of testCase.expectedSymbols) {
- if (!symbolMatches(expected, retrievedSymbols)) {
- falseNegatives.push(expected);
- }
- }
- const totalRetrieved = truePositives.length + falsePositives.length;
- const precision = totalRetrieved > 0 ? truePositives.length / totalRetrieved : 0;
- const totalRelevant = testCase.expectedSymbols.length;
- const recall = totalRelevant > 0 ? truePositives.length / totalRelevant : 0;
- const f1Score = precision + recall > 0
- ? 2 * (precision * recall) / (precision + recall)
- : 0;
- // Check if passed thresholds (with 20% margin)
- const passedRecall = !testCase.minRecall || recall >= testCase.minRecall * 0.8;
- const passedPrecision = !testCase.minPrecision || precision >= testCase.minPrecision * 0.8;
- return {
- testCaseId: testCase.id,
- passed: passedRecall && passedPrecision,
- precision,
- recall,
- f1Score,
- truePositives,
- falsePositives,
- falseNegatives,
- contextTokens: 0,
- executionTimeMs: 0,
- };
- }
- /**
- * Print a results table
- */
- function printResultsTable(results: TestCaseResult[], fixtureName: string): void {
- console.log(`\n${'='.repeat(80)}`);
- console.log(` ${fixtureName} Results`);
- console.log('='.repeat(80));
- console.log('');
- console.log(' Test ID Type Prec Recall F1 Status');
- console.log(' ' + '-'.repeat(76));
- for (const r of results) {
- const id = r.testCaseId.padEnd(35);
- const type = r.testCaseId.split('-')[1]?.padEnd(10) || ''.padEnd(10);
- const prec = `${(r.precision * 100).toFixed(0)}%`.padStart(5);
- const recall = `${(r.recall * 100).toFixed(0)}%`.padStart(6);
- const f1 = `${(r.f1Score * 100).toFixed(0)}%`.padStart(5);
- const status = r.passed ? '✓' : '✗';
- console.log(` ${id} ${type} ${prec} ${recall} ${f1} ${status}`);
- }
- const avgPrecision = results.reduce((sum, r) => sum + r.precision, 0) / results.length;
- const avgRecall = results.reduce((sum, r) => sum + r.recall, 0) / results.length;
- const avgF1 = results.reduce((sum, r) => sum + r.f1Score, 0) / results.length;
- const passRate = results.filter(r => r.passed).length / results.length;
- console.log(' ' + '-'.repeat(76));
- console.log(` ${'AVERAGE'.padEnd(35)} ${''.padEnd(10)} ${`${(avgPrecision * 100).toFixed(0)}%`.padStart(5)} ${`${(avgRecall * 100).toFixed(0)}%`.padStart(6)} ${`${(avgF1 * 100).toFixed(0)}%`.padStart(5)} ${(passRate * 100).toFixed(0)}%`);
- console.log('');
- }
- describe('CodeGraph Evaluation', () => {
- describe('TypeScript Fixture', () => {
- let cg: CodeGraph;
- const fixturePath = path.resolve(__dirname, 'fixtures/typescript-project');
- const results: TestCaseResult[] = [];
- beforeAll(async () => {
- // Clean up any existing index
- const codegraphDir = path.join(fixturePath, '.codegraph');
- if (fs.existsSync(codegraphDir)) {
- fs.rmSync(codegraphDir, { recursive: true });
- }
- // Initialize and index
- cg = await CodeGraph.init(fixturePath, { index: true });
- }, 60000);
- afterAll(() => {
- // Print summary table after all tests
- printResultsTable(results, 'TypeScript');
- if (cg) {
- cg.destroy();
- }
- });
- it('should index all files', () => {
- const stats = cg.getStats();
- expect(stats.fileCount).toBeGreaterThanOrEqual(typescriptFixture.totalFiles);
- });
- // Generate test for each test case - collect results but don't fail
- for (const testCase of typescriptFixture.testCases) {
- it(`${testCase.id}: ${testCase.description}`, async () => {
- const result = await runSingleTest(cg, testCase);
- results.push(result);
- // Don't assert - just collect results
- expect(true).toBe(true);
- });
- }
- });
- describe('Python Fixture', () => {
- let cg: CodeGraph;
- const fixturePath = path.resolve(__dirname, 'fixtures/python-project');
- const results: TestCaseResult[] = [];
- beforeAll(async () => {
- // Clean up any existing index
- const codegraphDir = path.join(fixturePath, '.codegraph');
- if (fs.existsSync(codegraphDir)) {
- fs.rmSync(codegraphDir, { recursive: true });
- }
- // Initialize and index
- cg = await CodeGraph.init(fixturePath, { index: true });
- }, 60000);
- afterAll(() => {
- // Print summary table after all tests
- printResultsTable(results, 'Python');
- if (cg) {
- cg.destroy();
- }
- });
- it('should index all files', () => {
- const stats = cg.getStats();
- expect(stats.fileCount).toBeGreaterThanOrEqual(pythonFixture.totalFiles);
- });
- // Generate test for each test case - collect results but don't fail
- for (const testCase of pythonFixture.testCases) {
- it(`${testCase.id}: ${testCase.description}`, async () => {
- const result = await runSingleTest(cg, testCase);
- results.push(result);
- // Don't assert - just collect results
- expect(true).toBe(true);
- });
- }
- });
- });
|