Jelajahi Sumber

feat: Improve exact name match scoring and expand stop word filtering

Uses max FTS score as baseline for exact name matches to ensure nameMatchBonus differentiation during rescoring, increases exact match limit from 5 to 20 candidates, and adds common conversational terms to stop words to reduce query noise.
Colby McHenry 2 bulan lalu
induk
melakukan
630053f3a3
2 mengubah file dengan 11 tambahan dan 5 penghapusan
  1. 7 4
      src/db/queries.ts
  2. 4 1
      src/search/query-utils.ts

+ 7 - 4
src/db/queries.ts

@@ -489,11 +489,14 @@ export class QueryBuilder {
     }
 
     // Supplement: ensure exact name matches are always candidates.
-    // BM25 can bury short exact-match names (e.g. "Query") under hundreds of
-    // compound names (e.g. "QueryParserTokenManager") in large codebases,
+    // BM25 can bury short exact-match names (e.g. "getBean") under hundreds of
+    // compound names (e.g. "getBeanDescriptor") in large codebases,
     // pushing them past the FTS fetch limit before post-hoc scoring can help.
+    // Use the max BM25 score as the base so the nameMatchBonus (exact=30 vs
+    // prefix=20) actually differentiates them after rescoring.
     if (results.length > 0 && query) {
       const existingIds = new Set(results.map(r => r.node.id));
+      const maxFtsScore = Math.max(...results.map(r => r.score));
       const terms = query.split(/\s+/).filter(t => t.length >= 2);
       for (const term of terms) {
         let sql = 'SELECT * FROM nodes WHERE name = ? COLLATE NOCASE';
@@ -506,11 +509,11 @@ export class QueryBuilder {
           sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
           params.push(...languages);
         }
-        sql += ' LIMIT 5';
+        sql += ' LIMIT 20';
         const rows = this.db.prepare(sql).all(...params) as NodeRow[];
         for (const row of rows) {
           if (!existingIds.has(row.id)) {
-            results.push({ node: rowToNode(row), score: 0 });
+            results.push({ node: rowToNode(row), score: maxFtsScore });
             existingIds.add(row.id);
           }
         }

+ 4 - 1
src/search/query-utils.ts

@@ -20,9 +20,12 @@ export const STOP_WORDS = new Set([
   'every', 'how', 'what', 'where', 'when', 'who', 'which', 'why',
   'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'she', 'they',
   'show', 'give', 'tell',
-  'been', 'done', 'made', 'used', 'using', 'works', 'found',
+  'been', 'done', 'made', 'used', 'using', 'work', 'works', 'found',
   'also', 'into', 'then', 'than', 'just', 'more', 'some', 'such',
   'over', 'only', 'out', 'its', 'so', 'up', 'as', 'if',
+  'look', 'need', 'needs', 'want', 'happen', 'happens',
+  'affect', 'affected', 'break', 'breaks', 'failing',
+  'implemented', 'implement',
   // Code-specific noise (avoid filtering common symbol names like get/set/add/build/find/list)
   'code', 'file', 'files', 'function', 'method', 'class', 'type',
   'fix', 'bug', 'called',