Explorar o código

feat: Improve exact name match scoring and expand stop word filtering

Uses max FTS score as baseline for exact name matches to ensure nameMatchBonus differentiation during rescoring, increases exact match limit from 5 to 20 candidates, and adds common conversational terms to stop words to reduce query noise.
Colby McHenry hai 2 meses
pai
achega
630053f3a3
Modificáronse 2 ficheiros con 11 adicións e 5 borrados
  1. 7 4
      src/db/queries.ts
  2. 4 1
      src/search/query-utils.ts

+ 7 - 4
src/db/queries.ts

@@ -489,11 +489,14 @@ export class QueryBuilder {
     }
 
     // Supplement: ensure exact name matches are always candidates.
-    // BM25 can bury short exact-match names (e.g. "Query") under hundreds of
-    // compound names (e.g. "QueryParserTokenManager") in large codebases,
+    // BM25 can bury short exact-match names (e.g. "getBean") under hundreds of
+    // compound names (e.g. "getBeanDescriptor") in large codebases,
     // pushing them past the FTS fetch limit before post-hoc scoring can help.
+    // Use the max BM25 score as the base so the nameMatchBonus (exact=30 vs
+    // prefix=20) actually differentiates them after rescoring.
     if (results.length > 0 && query) {
       const existingIds = new Set(results.map(r => r.node.id));
+      const maxFtsScore = Math.max(...results.map(r => r.score));
       const terms = query.split(/\s+/).filter(t => t.length >= 2);
       for (const term of terms) {
         let sql = 'SELECT * FROM nodes WHERE name = ? COLLATE NOCASE';
@@ -506,11 +509,11 @@ export class QueryBuilder {
           sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
           params.push(...languages);
         }
-        sql += ' LIMIT 5';
+        sql += ' LIMIT 20';
         const rows = this.db.prepare(sql).all(...params) as NodeRow[];
         for (const row of rows) {
           if (!existingIds.has(row.id)) {
-            results.push({ node: rowToNode(row), score: 0 });
+            results.push({ node: rowToNode(row), score: maxFtsScore });
             existingIds.add(row.id);
           }
         }

+ 4 - 1
src/search/query-utils.ts

@@ -20,9 +20,12 @@ export const STOP_WORDS = new Set([
   'every', 'how', 'what', 'where', 'when', 'who', 'which', 'why',
   'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'she', 'they',
   'show', 'give', 'tell',
-  'been', 'done', 'made', 'used', 'using', 'works', 'found',
+  'been', 'done', 'made', 'used', 'using', 'work', 'works', 'found',
   'also', 'into', 'then', 'than', 'just', 'more', 'some', 'such',
   'over', 'only', 'out', 'its', 'so', 'up', 'as', 'if',
+  'look', 'need', 'needs', 'want', 'happen', 'happens',
+  'affect', 'affected', 'break', 'breaks', 'failing',
+  'implemented', 'implement',
   // Code-specific noise (avoid filtering common symbol names like get/set/add/build/find/list)
   'code', 'file', 'files', 'function', 'method', 'class', 'type',
   'fix', 'bug', 'called',