|
@@ -19,6 +19,7 @@ import {
|
|
|
} from '../types';
|
|
} from '../types';
|
|
|
import { safeJsonParse } from '../utils';
|
|
import { safeJsonParse } from '../utils';
|
|
|
import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
|
|
import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
|
|
|
|
|
+import { parseQuery, boundedEditDistance } from '../search/query-parser';
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
* Database row types (snake_case from SQLite)
|
|
* Database row types (snake_case from SQLite)
|
|
@@ -478,14 +479,51 @@ export class QueryBuilder {
|
|
|
* 3. Score results based on match quality
|
|
* 3. Score results based on match quality
|
|
|
*/
|
|
*/
|
|
|
searchNodes(query: string, options: SearchOptions = {}): SearchResult[] {
|
|
searchNodes(query: string, options: SearchOptions = {}): SearchResult[] {
|
|
|
- const { kinds, languages, limit = 100, offset = 0 } = options;
|
|
|
|
|
|
|
+ const { limit = 100, offset = 0 } = options;
|
|
|
|
|
+
|
|
|
|
|
+ // Parse field-qualified bits out of the raw query (kind:, lang:,
|
|
|
|
|
+ // path:, name:). Anything not recognised stays in `text` and goes
|
|
|
|
|
+ // to FTS unchanged. Filters compose with the SearchOptions arg —
|
|
|
|
|
+ // both are applied (intersection-style).
|
|
|
|
|
+ const parsed = parseQuery(query);
|
|
|
|
|
+ const mergedKinds =
|
|
|
|
|
+ parsed.kinds.length > 0
|
|
|
|
|
+ ? Array.from(new Set([...(options.kinds ?? []), ...parsed.kinds]))
|
|
|
|
|
+ : options.kinds;
|
|
|
|
|
+ const mergedLanguages =
|
|
|
|
|
+ parsed.languages.length > 0
|
|
|
|
|
+ ? Array.from(new Set([...(options.languages ?? []), ...parsed.languages]))
|
|
|
|
|
+ : options.languages;
|
|
|
|
|
+ const pathFilters = parsed.pathFilters;
|
|
|
|
|
+ const nameFilters = parsed.nameFilters;
|
|
|
|
|
+ // The text portion drives FTS/LIKE; if all the user typed was
|
|
|
|
|
+ // filters (`kind:function`), we still need *some* candidate set,
|
|
|
|
|
+ // so synthesise an empty-text path that returns everything matching
|
|
|
|
|
+ // the filters.
|
|
|
|
|
+ const text = parsed.text;
|
|
|
|
|
+ const kinds = mergedKinds;
|
|
|
|
|
+ const languages = mergedLanguages;
|
|
|
|
|
|
|
|
// First try FTS5 with prefix matching
|
|
// First try FTS5 with prefix matching
|
|
|
- let results = this.searchNodesFTS(query, { kinds, languages, limit, offset });
|
|
|
|
|
|
|
+ let results = text
|
|
|
|
|
+ ? this.searchNodesFTS(text, { kinds, languages, limit, offset })
|
|
|
|
|
+ // Over-fetch by 5× when running filter-only (no text). The
|
|
|
|
|
+ // post-scoring path: + name: filters can be very selective, so
|
|
|
|
|
+ // a smaller multiplier risks returning fewer than `limit`
|
|
|
|
|
+ // results despite the DB having plenty of matches.
|
|
|
|
|
+ : this.searchAllByFilters({ kinds, languages, limit: limit * 5 });
|
|
|
|
|
|
|
|
// If no FTS results, try LIKE-based substring search
|
|
// If no FTS results, try LIKE-based substring search
|
|
|
- if (results.length === 0 && query.length >= 2) {
|
|
|
|
|
- results = this.searchNodesLike(query, { kinds, languages, limit, offset });
|
|
|
|
|
|
|
+ if (results.length === 0 && text.length >= 2) {
|
|
|
|
|
+ results = this.searchNodesLike(text, { kinds, languages, limit, offset });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Final fuzzy fallback: scan all known names and keep those within
|
|
|
|
|
+ // a tight Levenshtein distance. Only fires when both FTS and LIKE
|
|
|
|
|
+ // returned nothing AND there's a text portion long enough to be
|
|
|
|
|
+ // worth fuzzing (1-char queries would match too much).
|
|
|
|
|
+ if (results.length === 0 && text.length >= 3) {
|
|
|
|
|
+ results = this.searchNodesFuzzy(text, { kinds, languages, limit });
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Supplement: ensure exact name matches are always candidates.
|
|
// Supplement: ensure exact name matches are always candidates.
|
|
@@ -521,13 +559,14 @@ export class QueryBuilder {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Apply multi-signal scoring
|
|
// Apply multi-signal scoring
|
|
|
- if (results.length > 0 && query) {
|
|
|
|
|
|
|
+ if (results.length > 0 && (text || query)) {
|
|
|
|
|
+ const scoringQuery = text || query;
|
|
|
results = results.map(r => ({
|
|
results = results.map(r => ({
|
|
|
...r,
|
|
...r,
|
|
|
score: r.score
|
|
score: r.score
|
|
|
+ kindBonus(r.node.kind)
|
|
+ kindBonus(r.node.kind)
|
|
|
- + scorePathRelevance(r.node.filePath, query)
|
|
|
|
|
- + nameMatchBonus(r.node.name, query),
|
|
|
|
|
|
|
+ + scorePathRelevance(r.node.filePath, scoringQuery)
|
|
|
|
|
+ + nameMatchBonus(r.node.name, scoringQuery),
|
|
|
}));
|
|
}));
|
|
|
results.sort((a, b) => b.score - a.score);
|
|
results.sort((a, b) => b.score - a.score);
|
|
|
// Trim to requested limit after rescoring
|
|
// Trim to requested limit after rescoring
|
|
@@ -536,6 +575,117 @@ export class QueryBuilder {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // Apply path: + name: filters AFTER scoring. Scoring already uses
|
|
|
|
|
+ // path/name as a soft signal; the explicit filters here are a hard
|
|
|
|
|
+ // gate. Done last so the FTS limit fetched plenty of candidates to
|
|
|
|
|
+ // narrow from.
|
|
|
|
|
+ if (pathFilters.length > 0) {
|
|
|
|
|
+ const lowered = pathFilters.map((p) => p.toLowerCase());
|
|
|
|
|
+ results = results.filter((r) => {
|
|
|
|
|
+ const fp = r.node.filePath.toLowerCase();
|
|
|
|
|
+ return lowered.some((p) => fp.includes(p));
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ if (nameFilters.length > 0) {
|
|
|
|
|
+ const lowered = nameFilters.map((n) => n.toLowerCase());
|
|
|
|
|
+ results = results.filter((r) => {
|
|
|
|
|
+ const nm = r.node.name.toLowerCase();
|
|
|
|
|
+ return lowered.some((n) => nm.includes(n));
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return results;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Match-everything path used when the user supplied only field
|
|
|
|
|
+ * filters (`kind:function lang:typescript`) with no text. Returns
|
|
|
|
|
+ * candidates ordered by name; the caller's filter pass narrows to
|
|
|
|
|
+ * what was asked for.
|
|
|
|
|
+ */
|
|
|
|
|
+ private searchAllByFilters(options: {
|
|
|
|
|
+ kinds?: NodeKind[];
|
|
|
|
|
+ languages?: Language[];
|
|
|
|
|
+ limit: number;
|
|
|
|
|
+ }): SearchResult[] {
|
|
|
|
|
+ const { kinds, languages, limit } = options;
|
|
|
|
|
+ let sql = 'SELECT * FROM nodes WHERE 1=1';
|
|
|
|
|
+ const params: (string | number)[] = [];
|
|
|
|
|
+ if (kinds && kinds.length > 0) {
|
|
|
|
|
+ sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
|
|
|
|
|
+ params.push(...kinds);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (languages && languages.length > 0) {
|
|
|
|
|
+ sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
|
|
|
|
|
+ params.push(...languages);
|
|
|
|
|
+ }
|
|
|
|
|
+ sql += ' ORDER BY name LIMIT ?';
|
|
|
|
|
+ params.push(limit);
|
|
|
|
|
+ const rows = this.db.prepare(sql).all(...params) as NodeRow[];
|
|
|
|
|
+ return rows.map((row) => ({ node: rowToNode(row), score: 1 }));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Fuzzy fallback: when zero FTS/LIKE hits, try an edit-distance
|
|
|
|
|
+ * sweep over the distinct symbol-name set. Caps `maxDist` at 2 so
|
|
|
|
|
+ * `getUssr` finds `getUser` but `process` doesn't match `prosody`.
|
|
|
|
|
+ * Bounded edit distance keeps each comparison cheap; the per-query
|
|
|
|
|
+ * scan is O(distinct-name-count) which is far smaller than total
|
|
|
|
|
+ * node count on any real codebase.
|
|
|
|
|
+ */
|
|
|
|
|
+ private searchNodesFuzzy(
|
|
|
|
|
+ text: string,
|
|
|
|
|
+ options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
|
|
|
|
|
+ ): SearchResult[] {
|
|
|
|
|
+ const { kinds, languages, limit } = options;
|
|
|
|
|
+ const lowered = text.toLowerCase();
|
|
|
|
|
+ const maxDist = lowered.length <= 4 ? 1 : 2;
|
|
|
|
|
+
|
|
|
|
|
+ // Pull the distinct name list once. The set is cached on QueryBuilder
|
|
|
|
|
+ // by getAllNodeNames(); even on a 200k-node project the distinct
|
|
|
|
|
+ // name set is typically O(10k) because most names repeat. The
|
|
|
|
|
+ // candidate-cap below bounds memory regardless.
|
|
|
|
|
+ const allNames = this.getAllNodeNames();
|
|
|
|
|
+ const candidates: Array<{ name: string; dist: number }> = [];
|
|
|
|
|
+ for (const name of allNames) {
|
|
|
|
|
+ const dist = boundedEditDistance(name.toLowerCase(), lowered, maxDist);
|
|
|
|
|
+ if (dist <= maxDist) candidates.push({ name, dist });
|
|
|
|
|
+ }
|
|
|
|
|
+ candidates.sort((a, b) => a.dist - b.dist);
|
|
|
|
|
+
|
|
|
|
|
+ // Cap the per-name follow-up queries. Each survivor triggers a
|
|
|
|
|
+ // separate `SELECT * FROM nodes WHERE name = ?`; without this cap
|
|
|
|
|
+ // a project with many similar names (`getUser1`, `getUser2`...)
|
|
|
|
|
+ // could fan out far beyond `limit` queries before the inner-loop
|
|
|
|
|
+ // limit kicks in.
|
|
|
|
|
+ const FUZZY_FOLLOWUP_CAP = Math.max(limit * 2, 50);
|
|
|
|
|
+ const cappedCandidates = candidates.slice(0, FUZZY_FOLLOWUP_CAP);
|
|
|
|
|
+
|
|
|
|
|
+ const results: SearchResult[] = [];
|
|
|
|
|
+ const seen = new Set<string>();
|
|
|
|
|
+ for (const c of cappedCandidates) {
|
|
|
|
|
+ if (results.length >= limit) break;
|
|
|
|
|
+ let sql = 'SELECT * FROM nodes WHERE name = ?';
|
|
|
|
|
+ const params: (string | number)[] = [c.name];
|
|
|
|
|
+ if (kinds && kinds.length > 0) {
|
|
|
|
|
+ sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
|
|
|
|
|
+ params.push(...kinds);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (languages && languages.length > 0) {
|
|
|
|
|
+ sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
|
|
|
|
|
+ params.push(...languages);
|
|
|
|
|
+ }
|
|
|
|
|
+ sql += ' LIMIT 5';
|
|
|
|
|
+ const rows = this.db.prepare(sql).all(...params) as NodeRow[];
|
|
|
|
|
+ for (const row of rows) {
|
|
|
|
|
+ if (seen.has(row.id)) continue;
|
|
|
|
|
+ seen.add(row.id);
|
|
|
|
|
+ // Lower the score for each edit step away from the query so
|
|
|
|
|
+ // exact-match fallbacks (dist 0) outrank dist-2 typos.
|
|
|
|
|
+ results.push({ node: rowToNode(row), score: 1 / (1 + c.dist) });
|
|
|
|
|
+ if (results.length >= limit) break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
return results;
|
|
return results;
|
|
|
}
|
|
}
|
|
|
|
|
|