فهرست منبع

Resolve merge conflicts with main

- extraction/index.ts: use picomatch with static import (replacing
  dynamic require) and keep normalizePath for other call sites
- utils.ts: keep normalizePath from main, take PR's PID-based FileLock
Colby McHenry 4 ماه پیش
والد
کامیت
98034cc76e
8فایلهای تغییر یافته به همراه632 افزوده شده و 62 حذف شده
  1. 190 23
      __tests__/extraction.test.ts
  2. 135 0
      __tests__/search.test.ts
  3. 4 0
      src/config.ts
  4. 13 1
      src/db/queries.ts
  5. 147 33
      src/extraction/index.ts
  6. 46 5
      src/extraction/tree-sitter.ts
  7. 89 0
      src/search/query-utils.ts
  8. 8 0
      src/utils.ts

+ 190 - 23
__tests__/extraction.test.ts

@@ -9,8 +9,10 @@ import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { CodeGraph } from '../src';
-import { extractFromSource } from '../src/extraction';
+import { extractFromSource, scanDirectory, shouldIncludeFile } from '../src/extraction';
 import { detectLanguage, isLanguageSupported, getSupportedLanguages } from '../src/extraction/grammars';
+import { normalizePath } from '../src/utils';
+import { DEFAULT_CONFIG } from '../src/types';
 
 // Create a temporary directory for each test
 function createTempDir(): string {
@@ -127,14 +129,19 @@ export function processPayment(amount: number): Promise<Receipt> {
 `;
     const result = extractFromSource('payment.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    // File node + function node
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+    expect(fileNode?.name).toBe('payment.ts');
+
+    const funcNode = result.nodes.find((n) => n.kind === 'function');
+    expect(funcNode).toMatchObject({
       kind: 'function',
       name: 'processPayment',
       language: 'typescript',
       isExported: true,
     });
-    expect(result.nodes[0]?.signature).toContain('amount: number');
+    expect(funcNode?.signature).toContain('amount: number');
   });
 
   it('should extract class declarations', () => {
@@ -175,8 +182,11 @@ export interface User {
 `;
     const result = extractFromSource('types.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+
+    const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
+    expect(ifaceNode).toMatchObject({
       kind: 'interface',
       name: 'User',
       isExported: true,
@@ -207,8 +217,9 @@ export const useAuth = (): AuthContextValue => {
 `;
     const result = extractFromSource('hooks.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'useAuth');
+    expect(funcNode).toBeDefined();
+    expect(funcNode).toMatchObject({
       kind: 'function',
       name: 'useAuth',
       isExported: true,
@@ -223,8 +234,9 @@ export const processData = function(input: string): string {
 `;
     const result = extractFromSource('utils.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'processData');
+    expect(funcNode).toBeDefined();
+    expect(funcNode).toMatchObject({
       kind: 'function',
       name: 'processData',
       isExported: true,
@@ -286,8 +298,9 @@ export const fetchData = async () => {
 `;
     const result = extractFromSource('api.js', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'fetchData');
+    expect(funcNode).toBeDefined();
+    expect(funcNode).toMatchObject({
       kind: 'function',
       name: 'fetchData',
       isExported: true,
@@ -306,8 +319,8 @@ export type AuthContextValue = {
 `;
     const result = extractFromSource('types.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const typeNode = result.nodes.find((n) => n.kind === 'type_alias');
+    expect(typeNode).toMatchObject({
       kind: 'type_alias',
       name: 'AuthContextValue',
       isExported: true,
@@ -323,8 +336,8 @@ type InternalState = {
 `;
     const result = extractFromSource('internal.ts', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const typeNode = result.nodes.find((n) => n.kind === 'type_alias');
+    expect(typeNode).toMatchObject({
       kind: 'type_alias',
       name: 'InternalState',
       isExported: false,
@@ -415,7 +428,7 @@ export const useAuth = () => {
     expect(varNodes).toHaveLength(0);
   });
 
-  it('should not extract non-exported const as exported variable', () => {
+  it('should extract non-exported const as non-exported variable', () => {
     const code = `
 const internalConfig = {
   debug: true,
@@ -423,10 +436,10 @@ const internalConfig = {
 `;
     const result = extractFromSource('internal.ts', code);
 
-    // Non-exported const should NOT create a variable node
-    // (only export_statement triggers extractExportedVariables)
-    const varNodes = result.nodes.filter((n) => n.kind === 'variable' && n.name === 'internalConfig');
-    expect(varNodes).toHaveLength(0);
+    // Non-exported const at file level should be extracted as a constant (not exported)
+    const varNodes = result.nodes.filter((n) => (n.kind === 'variable' || n.kind === 'constant') && n.name === 'internalConfig');
+    expect(varNodes).toHaveLength(1);
+    expect(varNodes[0]?.isExported).toBeFalsy();
   });
 
   it('should extract Zod schema exports', () => {
@@ -463,6 +476,54 @@ export const authMachine = createMachine({
   });
 });
 
+describe('File Node Extraction', () => {
+  it('should create a file-kind node for each parsed file', () => {
+    const code = `
+export function greet(name: string): string {
+  return "Hello " + name;
+}
+`;
+    const result = extractFromSource('greeter.ts', code);
+
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+    expect(fileNode?.name).toBe('greeter.ts');
+    expect(fileNode?.filePath).toBe('greeter.ts');
+    expect(fileNode?.language).toBe('typescript');
+    expect(fileNode?.startLine).toBe(1);
+  });
+
+  it('should create file nodes for Python files', () => {
+    const code = `
+def main():
+    pass
+`;
+    const result = extractFromSource('main.py', code);
+
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+    expect(fileNode?.name).toBe('main.py');
+    expect(fileNode?.language).toBe('python');
+  });
+
+  it('should create containment edges from file node to top-level declarations', () => {
+    const code = `
+export function foo() {}
+export function bar() {}
+`;
+    const result = extractFromSource('fns.ts', code);
+
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+
+    // There should be contains edges from the file node to each function
+    const containsEdges = result.edges.filter(
+      (e) => e.source === fileNode?.id && e.kind === 'contains'
+    );
+    expect(containsEdges.length).toBeGreaterThanOrEqual(2);
+  });
+});
+
 describe('Python Extraction', () => {
   it('should extract function definitions', () => {
     const code = `
@@ -473,8 +534,11 @@ def calculate_total(items: list, tax_rate: float) -> float:
 `;
     const result = extractFromSource('calc.py', code);
 
-    expect(result.nodes).toHaveLength(1);
-    expect(result.nodes[0]).toMatchObject({
+    const fileNode = result.nodes.find((n) => n.kind === 'file');
+    expect(fileNode).toBeDefined();
+
+    const funcNode = result.nodes.find((n) => n.kind === 'function');
+    expect(funcNode).toMatchObject({
       kind: 'function',
       name: 'calculate_total',
       language: 'python',
@@ -1880,3 +1944,106 @@ export function multiply(a: number, b: number): number {
     cg.close();
   });
 });
+
+describe('Path Normalization', () => {
+  it('should convert backslashes to forward slashes', () => {
+    expect(normalizePath('gui\\node_modules\\foo')).toBe('gui/node_modules/foo');
+    expect(normalizePath('src\\components\\Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should leave forward-slash paths unchanged', () => {
+    expect(normalizePath('src/components/Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should handle empty string', () => {
+    expect(normalizePath('')).toBe('');
+  });
+});
+
+describe('Directory Exclusion', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = createTempDir();
+  });
+
+  afterEach(() => {
+    cleanupTempDir(tempDir);
+  });
+
+  it('should exclude node_modules directories', () => {
+    // Create structure: src/index.ts + node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'src');
+    const nmDir = path.join(tempDir, 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude nested node_modules directories', () => {
+    // Create structure: packages/app/node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'packages', 'app', 'src');
+    const nmDir = path.join(tempDir, 'packages', 'app', 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('packages/app/src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude .git directories', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const gitDir = path.join(tempDir, '.git', 'objects');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(gitDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(gitDir, 'pack.ts'), 'export const y = 2;');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('.git'))).toBe(true);
+  });
+
+  it('should return forward-slash paths on all platforms', () => {
+    const srcDir = path.join(tempDir, 'src', 'components');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'Button.tsx'), 'export function Button() {}');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files.length).toBe(1);
+    expect(files[0]).toBe('src/components/Button.tsx');
+    expect(files[0]).not.toContain('\\');
+  });
+
+  it('should respect .codegraphignore marker', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const vendorDir = path.join(tempDir, 'vendor');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(vendorDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(vendorDir, 'lib.ts'), 'export const y = 2;');
+    fs.writeFileSync(path.join(vendorDir, '.codegraphignore'), '');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('vendor'))).toBe(true);
+  });
+});

+ 135 - 0
__tests__/search.test.ts

@@ -0,0 +1,135 @@
+/**
+ * Search Query Utilities Tests
+ *
+ * Tests multi-signal scoring, kind bonuses, and path relevance.
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  extractSearchTerms,
+  scorePathRelevance,
+  kindBonus,
+  STOP_WORDS,
+} from '../src/search/query-utils';
+
+describe('Search Query Utilities', () => {
+  describe('extractSearchTerms', () => {
+    it('should extract meaningful terms from a query', () => {
+      const terms = extractSearchTerms('find the login handler');
+      expect(terms).toContain('login');
+      expect(terms).toContain('handler');
+      // 'find' and 'the' are stop words
+      expect(terms).not.toContain('find');
+      expect(terms).not.toContain('the');
+    });
+
+    it('should filter stop words', () => {
+      const terms = extractSearchTerms('how does the authentication work');
+      expect(terms).not.toContain('how');
+      expect(terms).not.toContain('does');
+      expect(terms).not.toContain('the');
+      expect(terms).toContain('authentication');
+      expect(terms).toContain('work');
+    });
+
+    it('should handle camelCase by lowercasing', () => {
+      const terms = extractSearchTerms('UserService');
+      expect(terms).toContain('userservice');
+    });
+
+    it('should strip punctuation', () => {
+      const terms = extractSearchTerms('payment.process()');
+      expect(terms).toContain('payment');
+      expect(terms).toContain('process');
+    });
+
+    it('should return empty for all stop words', () => {
+      const terms = extractSearchTerms('how do I get the');
+      expect(terms).toHaveLength(0);
+    });
+
+    it('should filter single-character terms', () => {
+      const terms = extractSearchTerms('a b c auth');
+      expect(terms).toEqual(['auth']);
+    });
+  });
+
+  describe('scorePathRelevance', () => {
+    it('should score filename matches highest', () => {
+      const score = scorePathRelevance('src/auth/login.ts', 'login');
+      expect(score).toBeGreaterThanOrEqual(10);
+    });
+
+    it('should score directory matches', () => {
+      const score = scorePathRelevance('src/auth/index.ts', 'auth');
+      expect(score).toBeGreaterThanOrEqual(5);
+    });
+
+    it('should return 0 for unrelated paths', () => {
+      const score = scorePathRelevance('src/utils/format.ts', 'payment');
+      expect(score).toBe(0);
+    });
+
+    it('should accumulate scores for multiple matching terms', () => {
+      const score = scorePathRelevance('src/auth/login.ts', 'auth login');
+      // Both 'auth' (dir match) and 'login' (filename match)
+      expect(score).toBeGreaterThanOrEqual(15);
+    });
+
+    it('should return 0 for empty query terms', () => {
+      const score = scorePathRelevance('src/auth/login.ts', 'the a an');
+      expect(score).toBe(0);
+    });
+  });
+
+  describe('kindBonus', () => {
+    it('should give functions and methods highest bonus', () => {
+      expect(kindBonus('function')).toBe(10);
+      expect(kindBonus('method')).toBe(10);
+    });
+
+    it('should rank functions > classes > variables > imports', () => {
+      expect(kindBonus('function')).toBeGreaterThan(kindBonus('class'));
+      expect(kindBonus('class')).toBeGreaterThan(kindBonus('variable'));
+      expect(kindBonus('variable')).toBeGreaterThan(kindBonus('import'));
+    });
+
+    it('should give routes high priority', () => {
+      expect(kindBonus('route')).toBeGreaterThanOrEqual(9);
+    });
+
+    it('should give components high priority', () => {
+      expect(kindBonus('component')).toBeGreaterThanOrEqual(8);
+    });
+
+    it('should return 0 for parameter and file kinds', () => {
+      expect(kindBonus('parameter')).toBe(0);
+      expect(kindBonus('file')).toBe(0);
+    });
+
+    it('should return 0 for unknown kinds', () => {
+      expect(kindBonus('unknown_kind' as any)).toBe(0);
+    });
+  });
+
+  describe('STOP_WORDS', () => {
+    it('should contain common English stop words', () => {
+      expect(STOP_WORDS.has('the')).toBe(true);
+      expect(STOP_WORDS.has('and')).toBe(true);
+      expect(STOP_WORDS.has('or')).toBe(true);
+    });
+
+    it('should contain action verbs used in queries', () => {
+      expect(STOP_WORDS.has('find')).toBe(true);
+      expect(STOP_WORDS.has('show')).toBe(true);
+      expect(STOP_WORDS.has('get')).toBe(true);
+      expect(STOP_WORDS.has('list')).toBe(true);
+    });
+
+    it('should not contain technical terms', () => {
+      expect(STOP_WORDS.has('function')).toBe(false);
+      expect(STOP_WORDS.has('class')).toBe(false);
+      expect(STOP_WORDS.has('auth')).toBe(false);
+    });
+  });
+});

+ 4 - 0
src/config.ts

@@ -8,6 +8,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import picomatch from 'picomatch';
 import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types';
+import { normalizePath } from './utils';
 
 /**
  * Configuration filename
@@ -269,6 +270,9 @@ export function addCustomPattern(
  * Check if a file path matches the include/exclude patterns
  */
 export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean {
+  // Normalize to forward slashes so Windows backslash paths match glob patterns
+  filePath = normalizePath(filePath);
+
   // Simple glob matching (for now, just check if any pattern matches)
   // A full implementation would use a proper glob library
 

+ 13 - 1
src/db/queries.ts

@@ -18,6 +18,7 @@ import {
   SearchResult,
 } from '../types';
 import { safeJsonParse } from '../utils';
+import { kindBonus, scorePathRelevance } from '../search/query-utils';
 
 /**
  * Database row types (snake_case from SQLite)
@@ -451,6 +452,15 @@ export class QueryBuilder {
       results = this.searchNodesLike(query, { kinds, languages, limit, offset });
     }
 
+    // Apply multi-signal scoring
+    if (results.length > 0 && query) {
+      results = results.map(r => ({
+        ...r,
+        score: r.score + kindBonus(r.node.kind) + scorePathRelevance(r.node.filePath, query),
+      }));
+      results.sort((a, b) => b.score - a.score);
+    }
+
     return results;
   }
 
@@ -463,9 +473,11 @@ export class QueryBuilder {
     // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
     // Escape special FTS5 characters and add prefix wildcard
     const ftsQuery = query
-      .replace(/['"*()]/g, '') // Remove special chars
+      .replace(/['"*():^]/g, '') // Remove FTS5 special chars
       .split(/\s+/)
       .filter(term => term.length > 0)
+      // Strip FTS5 boolean operators to prevent query manipulation
+      .filter(term => !/^(AND|OR|NOT|NEAR)$/i.test(term))
       .map(term => `"${term}"*`) // Prefix match each term
       .join(' OR ');
 

+ 147 - 33
src/extraction/index.ts

@@ -8,6 +8,7 @@ import * as fs from 'fs';
 import * as fsp from 'fs/promises';
 import * as path from 'path';
 import * as crypto from 'crypto';
+import { execFileSync } from 'child_process';
 import {
   Language,
   FileRecord,
@@ -18,9 +19,16 @@ import {
 import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported } from './grammars';
-import { logDebug } from '../errors';
+import { logDebug, logWarn } from '../errors';
 import { captureException } from '../sentry';
-import { validatePathWithinRoot } from '../utils';
+import { validatePathWithinRoot, normalizePath } from '../utils';
+import picomatch from 'picomatch';
+
+/**
+ * Number of files to read in parallel during indexing.
+ * File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
+ */
+const FILE_IO_BATCH_SIZE = 10;
 
 /**
  * Progress callback for indexing operations
@@ -68,7 +76,7 @@ export function hashContent(content: string): string {
  * Check if a path matches any glob pattern (simplified)
  */
 function matchesGlob(filePath: string, pattern: string): boolean {
-  const picomatch = require('picomatch');
+  filePath = normalizePath(filePath);
   return picomatch.isMatch(filePath, pattern, { dot: true });
 }
 
@@ -96,6 +104,31 @@ export function shouldIncludeFile(
   return false;
 }
 
+/**
+ * Get directories ignored by .gitignore using git ls-files.
+ * Returns a Set of normalized relative directory paths (forward slashes, no trailing slash).
+ * Gracefully returns empty Set on any failure.
+ */
+function getGitIgnoredDirectories(rootDir: string): Set<string> {
+  try {
+    const output = execFileSync(
+      'git',
+      ['ls-files', '-oi', '--exclude-standard', '--directory'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
+    );
+    const dirs = new Set<string>();
+    for (const line of output.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed.endsWith('/')) {
+        dirs.add(normalizePath(trimmed.slice(0, -1)));
+      }
+    }
+    return dirs;
+  } catch {
+    return new Set<string>();
+  }
+}
+
 /**
  * Marker file name that indicates a directory (and all children) should be skipped
  */
@@ -111,22 +144,26 @@ export function scanDirectory(
 ): string[] {
   const files: string[] = [];
   let count = 0;
-  const visitedRealPaths = new Set<string>(); // Symlink cycle detection
+  // Track visited real paths to detect symlink cycles
+  const visitedDirs = new Set<string>();
+  const gitIgnoredDirs = getGitIgnoredDirectories(rootDir);
 
   function walk(dir: string): void {
-    // Symlink cycle detection: resolve real path and skip if already visited
+    // Resolve real path to detect symlink cycles
+    let realDir: string;
     try {
-      const realDir = fs.realpathSync(dir);
-      if (visitedRealPaths.has(realDir)) {
-        logDebug('Skipping directory to prevent symlink cycle', { dir, realDir });
-        return;
-      }
-      visitedRealPaths.add(realDir);
+      realDir = fs.realpathSync(dir);
     } catch {
-      // If realpath fails, skip this directory
+      logDebug('Skipping unresolvable directory', { dir });
       return;
     }
 
+    if (visitedDirs.has(realDir)) {
+      logDebug('Skipping already-visited directory (symlink cycle)', { dir, realDir });
+      return;
+    }
+    visitedDirs.add(realDir);
+
     // Check for .codegraphignore marker file - skip entire directory tree if present
     const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
     if (fs.existsSync(ignoreMarker)) {
@@ -145,7 +182,7 @@ export function scanDirectory(
 
     for (const entry of entries) {
       const fullPath = path.join(dir, entry.name);
-      const relativePath = path.relative(rootDir, fullPath);
+      const relativePath = normalizePath(path.relative(rootDir, fullPath));
 
       // Follow symlinked directories, but skip symlinked files to non-project targets
       if (entry.isSymbolicLink()) {
@@ -153,6 +190,10 @@ export function scanDirectory(
           const realTarget = fs.realpathSync(fullPath);
           const stat = fs.statSync(realTarget);
           if (stat.isDirectory()) {
+            // Check gitignore first (fast O(1) lookup)
+            if (gitIgnoredDirs.has(relativePath)) {
+              continue;
+            }
             // Check exclusion, then recurse (cycle detection handles the rest)
             const dirPattern = relativePath + '/';
             let excluded = false;
@@ -181,6 +222,10 @@ export function scanDirectory(
       }
 
       if (entry.isDirectory()) {
+        // Check gitignore first (fast O(1) lookup)
+        if (gitIgnoredDirs.has(relativePath)) {
+          continue;
+        }
         // Check if directory should be excluded
         const dirPattern = relativePath + '/';
         let excluded = false;
@@ -265,10 +310,11 @@ export class ExtractionOrchestrator {
       };
     }
 
-    // Phase 2: Parse files
+    // Phase 2: Parse files (read in parallel batches, parse/store sequentially)
     const total = files.length;
+    let processed = 0;
 
-    for (let i = 0; i < files.length; i++) {
+    for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
       if (signal?.aborted) {
         return {
           success: false,
@@ -281,26 +327,69 @@ export class ExtractionOrchestrator {
         };
       }
 
-      const filePath = files[i]!;
-      onProgress?.({
-        phase: 'parsing',
-        current: i + 1,
-        total,
-        currentFile: filePath,
-      });
+      const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
 
-      const result = await this.indexFile(filePath);
+      // Read files in parallel (with path validation before any I/O)
+      const fileContents = await Promise.all(
+        batch.map(async (fp) => {
+          try {
+            const fullPath = validatePathWithinRoot(this.rootDir, fp);
+            if (!fullPath) {
+              logWarn('Path traversal blocked in batch reader', { filePath: fp });
+              return { filePath: fp, content: null as string | null, stats: null as fs.Stats | null, error: new Error('Path traversal blocked') };
+            }
+            const content = await fsp.readFile(fullPath, 'utf-8');
+            const stats = await fsp.stat(fullPath);
+            return { filePath: fp, content, stats, error: null as Error | null };
+          } catch (err) {
+            return { filePath: fp, content: null as string | null, stats: null as fs.Stats | null, error: err as Error };
+          }
+        })
+      );
+
+      // Parse and store sequentially
+      for (const { filePath, content, stats, error } of fileContents) {
+        if (signal?.aborted) {
+          return {
+            success: false,
+            filesIndexed,
+            filesSkipped,
+            nodesCreated: totalNodes,
+            edgesCreated: totalEdges,
+            errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
+            durationMs: Date.now() - startTime,
+          };
+        }
 
-      if (result.errors.length > 0) {
-        errors.push(...result.errors);
-      }
+        processed++;
+        onProgress?.({
+          phase: 'parsing',
+          current: processed,
+          total,
+          currentFile: filePath,
+        });
 
-      if (result.nodes.length > 0) {
-        filesIndexed++;
-        totalNodes += result.nodes.length;
-        totalEdges += result.edges.length;
-      } else if (result.errors.length === 0) {
-        filesSkipped++;
+        if (error || content === null || stats === null) {
+          errors.push({
+            message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
+            severity: 'error',
+          });
+          continue;
+        }
+
+        const result = await this.indexFileWithContent(filePath, content, stats);
+
+        if (result.errors.length > 0) {
+          errors.push(...result.errors);
+        }
+
+        if (result.nodes.length > 0) {
+          filesIndexed++;
+          totalNodes += result.nodes.length;
+          totalEdges += result.edges.length;
+        } else if (result.errors.length === 0) {
+          filesSkipped++;
+        }
       }
     }
 
@@ -378,7 +467,7 @@ export class ExtractionOrchestrator {
       };
     }
 
-    // Check file exists and is readable
+    // Read file content and stats
     let content: string;
     let stats: fs.Stats;
     try {
@@ -400,6 +489,31 @@ export class ExtractionOrchestrator {
       };
     }
 
+    return this.indexFileWithContent(relativePath, content, stats);
+  }
+
+  /**
+   * Index a single file with pre-read content and stats.
+   * Used by the parallel batch reader to avoid redundant file I/O.
+   */
+  async indexFileWithContent(
+    relativePath: string,
+    content: string,
+    stats: fs.Stats
+  ): Promise<ExtractionResult> {
+    // Prevent path traversal
+    const fullPath = validatePathWithinRoot(this.rootDir, relativePath);
+    if (!fullPath) {
+      logWarn('Path traversal blocked in indexFileWithContent', { relativePath });
+      return {
+        nodes: [],
+        edges: [],
+        unresolvedReferences: [],
+        errors: [{ message: 'Path traversal blocked', severity: 'error' }],
+        durationMs: 0,
+      };
+    }
+
     // Check file size
     if (stats.size > this.config.maxFileSize) {
       return {

+ 46 - 5
src/extraction/tree-sitter.ts

@@ -6,6 +6,7 @@
 
 import { SyntaxNode, Tree } from 'tree-sitter';
 import * as crypto from 'crypto';
+import * as path from 'path';
 import {
   Language,
   Node,
@@ -875,7 +876,28 @@ export class TreeSitterExtractor {
 
     try {
       this.tree = parser.parse(this.source);
+
+      // Create file node representing the source file
+      const fileNode: Node = {
+        id: `file:${this.filePath}`,
+        kind: 'file',
+        name: path.basename(this.filePath),
+        qualifiedName: this.filePath,
+        filePath: this.filePath,
+        language: this.language,
+        startLine: 1,
+        endLine: this.source.split('\n').length,
+        startColumn: 0,
+        endColumn: 0,
+        isExported: false,
+        updatedAt: Date.now(),
+      };
+      this.nodes.push(fileNode);
+
+      // Push file node onto stack so top-level declarations get contains edges
+      this.nodeStack.push(fileNode.id);
       this.visitNode(this.tree.rootNode);
+      this.nodeStack.pop();
     } catch (error) {
       captureException(error, { operation: 'tree-sitter-parse', filePath: this.filePath, language: this.language });
       this.errors.push({
@@ -905,7 +927,7 @@ export class TreeSitterExtractor {
     // Check for function declarations
     // For Python/Ruby, function_definition inside a class should be treated as method
     if (this.extractor.functionTypes.includes(nodeType)) {
-      if (this.nodeStack.length > 0 && this.extractor.methodTypes.includes(nodeType)) {
+      if (this.isInsideClassLikeNode() && this.extractor.methodTypes.includes(nodeType)) {
         // Inside a class - treat as method
         this.extractMethod(node);
         skipChildren = true; // extractMethod visits children via visitFunctionBody
@@ -958,7 +980,7 @@ export class TreeSitterExtractor {
     }
     // Check for variable declarations (const, let, var, etc.)
     // Only extract top-level variables (not inside functions/methods)
-    else if (this.extractor.variableTypes.includes(nodeType) && this.nodeStack.length === 0) {
+    else if (this.extractor.variableTypes.includes(nodeType) && !this.isInsideClassLikeNode()) {
       this.extractVariable(node);
       skipChildren = true; // extractVariable handles children
     }
@@ -1060,6 +1082,25 @@ export class TreeSitterExtractor {
     return false;
   }
 
+  /**
+   * Check if the current node stack indicates we are inside a class-like node
+   * (class, struct, interface, trait). File nodes do not count as class-like.
+   */
+  private isInsideClassLikeNode(): boolean {
+    if (this.nodeStack.length === 0) return false;
+    const parentId = this.nodeStack[this.nodeStack.length - 1];
+    if (!parentId) return false;
+    const parentNode = this.nodes.find((n) => n.id === parentId);
+    if (!parentNode) return false;
+    return (
+      parentNode.kind === 'class' ||
+      parentNode.kind === 'struct' ||
+      parentNode.kind === 'interface' ||
+      parentNode.kind === 'trait' ||
+      parentNode.kind === 'enum'
+    );
+  }
+
   /**
    * Extract a function
    */
@@ -1160,10 +1201,10 @@ export class TreeSitterExtractor {
   private extractMethod(node: SyntaxNode): void {
     if (!this.extractor) return;
 
-    // For most languages, only extract as method if inside a class
+    // For most languages, only extract as method if inside a class-like node
     // But Go methods are top-level with a receiver, so always treat them as methods
-    if (this.nodeStack.length === 0 && this.language !== 'go') {
-      // Top-level and not Go, treat as function
+    if (!this.isInsideClassLikeNode() && this.language !== 'go') {
+      // Not inside a class-like node and not Go, treat as function
       this.extractFunction(node);
       return;
     }

+ 89 - 0
src/search/query-utils.ts

@@ -0,0 +1,89 @@
+/**
+ * Search Query Utilities
+ *
+ * Shared module for search term extraction and scoring.
+ */
+
+import * as path from 'path';
+import { Node } from '../types';
+
+/**
+ * Common stop words to filter from search queries
+ */
+export const STOP_WORDS = new Set([
+  'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
+  'of', 'with', 'by', 'from', 'is', 'it', 'that', 'this', 'are', 'was',
+  'be', 'has', 'had', 'have', 'do', 'does', 'did', 'will', 'would', 'could',
+  'should', 'may', 'might', 'can', 'shall', 'not', 'no', 'all', 'each',
+  'every', 'how', 'what', 'where', 'when', 'who', 'which', 'why',
+  'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'she', 'they',
+  'find', 'show', 'get', 'list', 'give', 'tell',
+]);
+
+/**
+ * Extract meaningful search terms from a natural language query
+ */
+export function extractSearchTerms(query: string): string[] {
+  return query
+    .toLowerCase()
+    .replace(/[^\w\s-]/g, ' ')
+    .split(/\s+/)
+    .filter(term => term.length > 1 && !STOP_WORDS.has(term));
+}
+
+/**
+ * Score path relevance to a query
+ * Higher score = more relevant path
+ */
+export function scorePathRelevance(filePath: string, query: string): number {
+  const terms = extractSearchTerms(query);
+  if (terms.length === 0) return 0;
+
+  const pathLower = filePath.toLowerCase();
+  const fileName = path.basename(filePath).toLowerCase();
+  const dirName = path.dirname(filePath).toLowerCase();
+  let score = 0;
+
+  for (const term of terms) {
+    // Exact filename match (strongest)
+    if (fileName.includes(term)) score += 10;
+    // Directory match
+    if (dirName.includes(term)) score += 5;
+    // General path match
+    else if (pathLower.includes(term)) score += 3;
+  }
+
+  return score;
+}
+
+/**
+ * Kind-based bonus for search ranking
+ * Functions and classes are typically more relevant than variables/imports
+ */
+export function kindBonus(kind: Node['kind']): number {
+  const bonuses: Record<string, number> = {
+    function: 10,
+    method: 10,
+    class: 8,
+    interface: 7,
+    type_alias: 6,
+    struct: 6,
+    trait: 6,
+    enum: 5,
+    component: 8,
+    route: 9,
+    module: 4,
+    property: 3,
+    field: 3,
+    variable: 2,
+    constant: 3,
+    import: 1,
+    export: 1,
+    parameter: 0,
+    namespace: 4,
+    file: 0,
+    protocol: 6,
+    enum_member: 3,
+  };
+  return bonuses[kind] ?? 0;
+}

+ 8 - 0
src/utils.ts

@@ -166,6 +166,14 @@ export function clamp(value: number, min: number, max: number): number {
   return Math.max(min, Math.min(max, value));
 }
 
+/**
+ * Normalize a file path to use forward slashes.
+ * Fixes Windows backslash paths so glob matching works consistently.
+ */
+export function normalizePath(filePath: string): string {
+  return filePath.replace(/\\/g, '/');
+}
+
 /**
  * Cross-process file lock using a lock file with PID tracking.
  *