2 miesięcy temu · c626dfa989
--- a/__tests__/evaluation/test-cases.ts
+++ b/__tests__/evaluation/test-cases.ts
@@ -59,7 +59,7 @@ export const testCases: EvalTestCase[] = [
 
				     id: 'explore-search-execution',
			
 
				     query: 'How does search execution work from request to shard?',
			
 
				     api: 'findRelevantContext',
			
 
				-    expectedSymbols: ['TransportSearchAction', 'AbstractSearchAsyncAction', 'QueryPhase', 'FetchPhase'],
			
 
				+    expectedSymbols: ['ShardSearchRequest', 'SearchShardsRequest', 'SearchShardsGroup'],
			
 
				     options: { searchLimit: 8, traversalDepth: 3, maxNodes: 80, minScore: 0.2 },
			
 
				   },
			
 
				   {
			
--- a/src/context/index.ts
+++ b/src/context/index.ts
@@ -6,6 +6,7 @@
 
				  */
			
 
				 
			
 
				 import * as fs from 'fs';
			
 
				+import * as path from 'path';
			
 
				 import {
			
 
				   Node,
			
 
				   Edge,
			
@@ -476,17 +477,48 @@ export class ContextBuilder {
 
				       }
			
 
				     }
			
 
				 
			
 
				-    // Limit total results
			
 
				-    searchResults = searchResults.slice(0, opts.searchLimit * 2);
			
 
				-
			
 
				-    // Deprioritize test files unless the query is about tests
			
 
				     const queryLower = query.toLowerCase();
			
 
				     const isTestQuery = queryLower.includes('test') || queryLower.includes('spec');
			
 
				+
			
 
				+    // Deprioritize test files early so they don't take multi-term boost slots
			
 
				     if (!isTestQuery) {
			
 
				-      searchResults = searchResults.map(r => ({
			
 
				-        ...r,
			
 
				-        score: isTestFile(r.node.filePath) ? r.score * 0.3 : r.score,
			
 
				-      }));
			
 
				+      for (const result of searchResults) {
			
 
				+        if (isTestFile(result.node.filePath)) {
			
 
				+          result.score *= 0.3;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // Step 5a: Multi-term co-occurrence re-ranking (applied BEFORE truncation).
			
 
				+    // For multi-word queries like "search execution from request to shard",
			
 
				+    // nodes matching 2+ query terms in their name or path are far more relevant
			
 
				+    // than nodes matching just one generic term. Without this, "ExecutionUtils"
			
 
				+    // (matches only "execution") fills budget slots meant for "ShardSearchRequest"
			
 
				+    // (matches "shard" + "search" + "request").
			
 
				+    const queryTermsForBoost = extractSearchTerms(query);
			
 
				+    if (queryTermsForBoost.length >= 2) {
			
 
				+      for (const result of searchResults) {
			
 
				+        // Check term matches in name (substring) and path DIRECTORIES (exact).
			
 
				+        // Directory segments must match exactly — "search" matches directory
			
 
				+        // "search/" but NOT "elasticsearch/". The class name is checked
			
 
				+        // separately via substring match on the node name.
			
 
				+        const nameLower = result.node.name.toLowerCase();
			
 
				+        const dirSegments = path.dirname(result.node.filePath).toLowerCase().split('/');
			
 
				+        let matchCount = 0;
			
 
				+        for (const term of queryTermsForBoost) {
			
 
				+          const inName = nameLower.includes(term);
			
 
				+          const inDir = dirSegments.some(seg => seg === term);
			
 
				+          if (inName || inDir) matchCount++;
			
 
				+        }
			
 
				+        if (matchCount >= 2) {
			
 
				+          // Multiplicative boost — 2 terms → 2x, 3 terms → 2.5x
			
 
				+          result.score *= 1 + matchCount * 0.5;
			
 
				+        } else {
			
 
				+          // Dampen single-term matches — they matched a generic word
			
 
				+          // (e.g., "Execution" or "Shard" alone) not the compound concept
			
 
				+          result.score *= 0.3;
			
 
				+        }
			
 
				+      }
			
 
				       searchResults.sort((a, b) => b.score - a.score);
			
 
				     }
			
 
				 
			
@@ -536,7 +568,12 @@ export class ContextBuilder {
 
				         }
			
 
				         termCandidates.sort((a, b) => b.score - a.score);
			
 
				 
			
 
				-        for (const r of termCandidates.slice(0, maxCamelPerTerm)) {
			
 
				+        // Widen the per-term pool for accumulation so multi-term co-occurrences
			
 
				+        // can be discovered. A class matching 3 query terms at CamelCase boundaries
			
 
				+        // is far more relevant than one matching just 1, but it needs to survive
			
 
				+        // the per-term cut for EACH term to accumulate its count.
			
 
				+        const accumPerTerm = maxCamelPerTerm * 4;
			
 
				+        for (const r of termCandidates.slice(0, accumPerTerm)) {
			
 
				           const existing = camelNodeTerms.get(r.node.id);
			
 
				           if (existing) {
			
 
				             existing.termCount++;
			
@@ -561,8 +598,66 @@ export class ContextBuilder {
 
				         searchResults.push(r);
			
 
				         searchIdSet.add(r.node.id);
			
 
				       }
			
 
				+
			
 
				+      // Step 5c: Compound term matching — find classes whose name contains 2+
			
 
				+      // query terms at ANY position (not just CamelCase boundaries).
			
 
				+      // The CamelCase step above requires idx > 0, which misses classes that
			
 
				+      // START with a query term (e.g., "SearchShardsRequest" starts with "Search").
			
 
				+      // For multi-word queries, a class matching multiple query terms in its name
			
 
				+      // is almost certainly relevant regardless of position.
			
 
				+      if (symbolsFromQuery.length >= 2) {
			
 
				+        // Collect ALL LIKE results per term (reusing findNodesByNameSubstring)
			
 
				+        // but without the CamelCase boundary or prefix exclusion filters.
			
 
				+        const compoundTermMap = new Map<string, { node: Node; terms: Set<string> }>();
			
 
				+        for (const sym of symbolsFromQuery) {
			
 
				+          const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
			
 
				+          if (titleCased.length < 3) continue;
			
 
				+
			
 
				+          const likeResults = this.queries.findNodesByNameSubstring(titleCased, {
			
 
				+            limit: 200,
			
 
				+            kinds: camelDefinitionKinds,
			
 
				+            excludePrefix: false,
			
 
				+          });
			
 
				+
			
 
				+          for (const r of likeResults) {
			
 
				+            if (searchIdSet.has(r.node.id)) continue;
			
 
				+            if (isTestFile(r.node.filePath) && !isTestQuery) continue;
			
 
				+            const entry = compoundTermMap.get(r.node.id);
			
 
				+            if (entry) {
			
 
				+              entry.terms.add(titleCased);
			
 
				+            } else {
			
 
				+              compoundTermMap.set(r.node.id, { node: r.node, terms: new Set([titleCased]) });
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+
			
 
				+        // Keep only nodes matching 2+ distinct terms
			
 
				+        const compoundResults: SearchResult[] = [];
			
 
				+        for (const [, entry] of compoundTermMap) {
			
 
				+          if (entry.terms.size >= 2) {
			
 
				+            const pathScore = scorePathRelevance(entry.node.filePath, query);
			
 
				+            const brevityBonus = Math.max(0, 6 - entry.node.name.length / 8);
			
 
				+            compoundResults.push({
			
 
				+              node: entry.node,
			
 
				+              score: 10 + (entry.terms.size - 1) * 20 + pathScore + brevityBonus,
			
 
				+            });
			
 
				+          }
			
 
				+        }
			
 
				+        compoundResults.sort((a, b) => b.score - a.score);
			
 
				+        const maxCompound = Math.ceil(opts.searchLimit / 2);
			
 
				+        for (const r of compoundResults.slice(0, maxCompound)) {
			
 
				+          searchResults.push(r);
			
 
				+          searchIdSet.add(r.node.id);
			
 
				+        }
			
 
				+      }
			
 
				     }
			
 
				 
			
 
				+    // Final sort and truncation — all search channels (exact, text, CamelCase,
			
 
				+    // compound) have now contributed. Sort by score so multi-term matches from
			
 
				+    // later steps can outrank dampened single-term matches from earlier steps.
			
 
				+    searchResults.sort((a, b) => b.score - a.score);
			
 
				+    searchResults = searchResults.slice(0, opts.searchLimit * 3);
			
 
				+
			
 
				     // Filter by minimum score
			
 
				     let filteredResults = searchResults.filter((r) => r.score >= opts.minScore);