1
0

scoring.ts 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import type { EvalResult } from './types.js';
  2. export const PASS_THRESHOLD = 0.5;
  3. export function scoreSearchNodes(
  4. caseId: string,
  5. expectedSymbols: string[],
  6. results: Array<{ node: { name: string }; score: number }>,
  7. latencyMs: number
  8. ): EvalResult {
  9. const expectedLower = expectedSymbols.map((s) => s.toLowerCase());
  10. const resultNames = results.map((r) => r.node.name.toLowerCase());
  11. const found: string[] = [];
  12. const missed: string[] = [];
  13. let firstRank = 0;
  14. for (let i = 0; i < expectedLower.length; i++) {
  15. const idx = resultNames.indexOf(expectedLower[i]);
  16. if (idx !== -1) {
  17. found.push(expectedSymbols[i]);
  18. if (firstRank === 0) firstRank = idx + 1;
  19. } else {
  20. missed.push(expectedSymbols[i]);
  21. }
  22. }
  23. const recall = expectedSymbols.length > 0 ? found.length / expectedSymbols.length : 0;
  24. const mrr = firstRank > 0 ? 1 / firstRank : 0;
  25. return {
  26. caseId,
  27. pass: recall >= PASS_THRESHOLD,
  28. recall,
  29. mrr,
  30. foundSymbols: found,
  31. missedSymbols: missed,
  32. latencyMs,
  33. };
  34. }
  35. export function scoreFindRelevantContext(
  36. caseId: string,
  37. expectedSymbols: string[],
  38. subgraph: { nodes: Map<string, { name: string }>; edges: unknown[]; roots: string[] },
  39. latencyMs: number
  40. ): EvalResult {
  41. const expectedLower = new Set(expectedSymbols.map((s) => s.toLowerCase()));
  42. const nodeNames = new Set<string>();
  43. for (const node of subgraph.nodes.values()) {
  44. nodeNames.add(node.name.toLowerCase());
  45. }
  46. const found: string[] = [];
  47. const missed: string[] = [];
  48. for (const sym of expectedSymbols) {
  49. if (nodeNames.has(sym.toLowerCase())) {
  50. found.push(sym);
  51. } else {
  52. missed.push(sym);
  53. }
  54. }
  55. const recall = expectedSymbols.length > 0 ? found.length / expectedSymbols.length : 0;
  56. const nodeCount = subgraph.nodes.size;
  57. const edgeCount = subgraph.edges.length;
  58. const edgeDensity = nodeCount > 0 ? edgeCount / nodeCount : 0;
  59. return {
  60. caseId,
  61. pass: recall >= PASS_THRESHOLD,
  62. recall,
  63. mrr: 0,
  64. foundSymbols: found,
  65. missedSymbols: missed,
  66. nodeCount,
  67. edgeCount,
  68. edgeDensity,
  69. latencyMs,
  70. };
  71. }