hace 2 meses · 0756636bde
--- a/src/search/query-utils.ts
+++ b/src/search/query-utils.ts
@@ -8,9 +8,11 @@ import * as path from 'path';
 
				 import { Node } from '../types';
			
 
				 
			
 
				 /**
			
 
				- * Common stop words to filter from search queries
			
 
				+ * Common stop words to filter from search queries.
			
 
				+ * Includes generic English + code-specific noise words.
			
 
				  */
			
 
				 export const STOP_WORDS = new Set([
			
 
				+  // English
			
 
				   'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
			
 
				   'of', 'with', 'by', 'from', 'is', 'it', 'that', 'this', 'are', 'was',
			
 
				   'be', 'has', 'had', 'have', 'do', 'does', 'did', 'will', 'would', 'could',
			
@@ -18,17 +20,41 @@ export const STOP_WORDS = new Set([
 
				   'every', 'how', 'what', 'where', 'when', 'who', 'which', 'why',
			
 
				   'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'she', 'they',
			
 
				   'find', 'show', 'get', 'list', 'give', 'tell',
			
 
				+  'been', 'done', 'made', 'used', 'using', 'work', 'works', 'found',
			
 
				+  'also', 'into', 'then', 'than', 'just', 'more', 'some', 'such',
			
 
				+  'over', 'only', 'new', 'out', 'its', 'so', 'up', 'as', 'if',
			
 
				+  // Code-specific noise
			
 
				+  'code', 'file', 'files', 'function', 'method', 'class', 'type',
			
 
				+  'build', 'run', 'test', 'fix', 'bug', 'call', 'called', 'set', 'add',
			
 
				 ]);
			
 
				 
			
 
				 /**
			
 
				- * Extract meaningful search terms from a natural language query
			
 
				+ * Extract meaningful search terms from a natural language query.
			
 
				+ * Splits camelCase, PascalCase, snake_case, SCREAMING_SNAKE, and dot.notation
			
 
				+ * into individual tokens before filtering.
			
 
				  */
			
 
				 export function extractSearchTerms(query: string): string[] {
			
 
				-  return query
			
 
				-    .toLowerCase()
			
 
				-    .replace(/[^\w\s-]/g, ' ')
			
 
				-    .split(/\s+/)
			
 
				-    .filter(term => term.length > 1 && !STOP_WORDS.has(term));
			
 
				+  const tokens = new Set<string>();
			
 
				+
			
 
				+  // Split camelCase / PascalCase: "getUserName" → "get User Name"
			
 
				+  const camelSplit = query
			
 
				+    .replace(/([a-z])([A-Z])/g, '$1 $2')
			
 
				+    .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2');
			
 
				+
			
 
				+  // Replace underscores and dots with spaces (snake_case, dot.notation)
			
 
				+  const normalised = camelSplit.replace(/[_.]+/g, ' ');
			
 
				+
			
 
				+  // Split on any non-alphanumeric character
			
 
				+  const words = normalised.split(/[^a-zA-Z0-9]+/).filter(Boolean);
			
 
				+
			
 
				+  for (const word of words) {
			
 
				+    const lower = word.toLowerCase();
			
 
				+    if (lower.length < 3) continue;
			
 
				+    if (STOP_WORDS.has(lower)) continue;
			
 
				+    tokens.add(lower);
			
 
				+  }
			
 
				+
			
 
				+  return [...tokens];
			
 
				 }
			
 
				 
			
 
				 /**