Sfoglia il codice sorgente

feat: Add exact name match supplement to prevent BM25 burial in search results

Addresses cases where BM25 can bury short exact-match names (e.g. "Query") under hundreds of compound names (e.g. "QueryParserTokenManager") in large codebases, pushing them past the FTS fetch limit before post-hoc scoring can help. Supplements primary search results with direct case-insensitive name lookups for each query term, ensuring exact matches are always candidates for scoring.
Colby McHenry 2 mesi fa
parent
commit
f3a0fd402f
1 ha cambiato i file con 29 aggiunte e 0 eliminazioni
  1. 29 0
      src/db/queries.ts

+ 29 - 0
src/db/queries.ts

@@ -488,6 +488,35 @@ export class QueryBuilder {
       results = this.searchNodesLike(query, { kinds, languages, limit, offset });
     }
 
+    // Supplement: ensure exact name matches are always candidates.
+    // BM25 can bury short exact-match names (e.g. "Query") under hundreds of
+    // compound names (e.g. "QueryParserTokenManager") in large codebases,
+    // pushing them past the FTS fetch limit before post-hoc scoring can help.
+    if (results.length > 0 && query) {
+      const existingIds = new Set(results.map(r => r.node.id));
+      const terms = query.split(/\s+/).filter(t => t.length >= 2);
+      for (const term of terms) {
+        let sql = 'SELECT * FROM nodes WHERE name = ? COLLATE NOCASE';
+        const params: (string | number)[] = [term];
+        if (kinds && kinds.length > 0) {
+          sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
+          params.push(...kinds);
+        }
+        if (languages && languages.length > 0) {
+          sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
+          params.push(...languages);
+        }
+        sql += ' LIMIT 5';
+        const rows = this.db.prepare(sql).all(...params) as NodeRow[];
+        for (const row of rows) {
+          if (!existingIds.has(row.id)) {
+            results.push({ node: rowToNode(row), score: 0 });
+            existingIds.add(row.id);
+          }
+        }
+      }
+    }
+
     // Apply multi-signal scoring
     if (results.length > 0 && query) {
       results = results.map(r => ({