Bläddra i källkod

fix(mcp): resolve module-qualified symbol lookups (#173) (#179)

`codegraph_callees stage_apply::run` (and `_node`, `_impact`, ...)
returned "not found" against a repo with 7-9 sibling Rust modules,
each exporting `pub async fn run`. Two underlying issues:

1. The FTS5 query builder stripped `:` as a special char without
   splitting on `::`, so `stage_apply::run` collapsed to the literal
   `stage_applyrun` which matches nothing. Treat `::` as whitespace
   before the strip step so both halves become FTS tokens.

2. `matchesSymbol` only understood `Parent.child` qualifiers and
   relied on `qualifiedName` carrying the module path. Rust file-
   level functions don't have their module name in `qualifiedName`
   (it's encoded in the file path instead), so even dot-style
   lookups failed. Accept `::`, `.`, `/` as separators; multi-level
   forms compose; Rust `crate::`/`super::`/`self::` prefixes get
   stripped before path matching. Fall back to file-path containment
   when the qualified-name suffix doesn't match — `stage_apply::run`
   matches a `run` in any file whose path has a `stage_apply` segment.

Also tightens the no-match branch: qualified lookups no longer fall
through to a fuzzy text match. `stage_apply::nonexistent_fn` returns
`null` instead of silently resolving to an unrelated `rollback` in
the same file.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 månad sedan
förälder
incheckning
83f36dc170
4 ändrade filer med 312 tillägg och 18 borttagningar
  1. 23 0
      CHANGELOG.md
  2. 194 0
      __tests__/symbol-lookup.test.ts
  3. 7 1
      src/db/queries.ts
  4. 88 17
      src/mcp/tools.ts

+ 23 - 0
CHANGELOG.md

@@ -37,6 +37,29 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
   [#168](https://github.com/colbymchenry/codegraph/issues/168). Thanks to
   [@starkleek](https://github.com/starkleek) for the report and to
   [@Bortlesboat](https://github.com/Bortlesboat) for the initial PR.
+- **MCP / search**: module-qualified symbol lookups now resolve. The
+  MCP tools (`codegraph_node`, `codegraph_callees`, `codegraph_impact`,
+  …) accept `module::symbol` (Rust / C++ / Ruby), `Module.symbol`
+  (TS / JS / Python), and `module/symbol` (path-style) — multi-level
+  forms (`crate::configurator::stage_apply::run`) and Rust path
+  prefixes (`crate`, `super`, `self`) are handled. Two underlying
+  fixes:
+    - The FTS5 query builder now treats `::` as a token separator
+      instead of stripping it to nothing, so `stage_apply::run` no
+      longer collapses to the unsearchable `stage_applyrun`.
+    - `matchesSymbol` falls back to a file-path containment check when
+      `qualifiedName` doesn't carry the module hierarchy (Rust file-
+      level functions, Python free functions in a package): a `run`
+      in `src/configurator/stage_apply.rs` now matches
+      `stage_apply::run` because `stage_apply` appears as a path
+      segment.
+    - Qualified lookups that don't match the qualifier no longer fall
+      through to fuzzy text matches — `stage_apply::nonexistent_fn`
+      returns `null` instead of resolving to an unrelated `rollback`
+      in the same file.
+  Closes [#173](https://github.com/colbymchenry/codegraph/issues/173).
+  Thanks to [@joselhurtado](https://github.com/joselhurtado) for the
+  detailed reproduction.
 
 [0.7.10]: https://github.com/colbymchenry/codegraph/releases/tag/v0.7.10
 

+ 194 - 0
__tests__/symbol-lookup.test.ts

@@ -0,0 +1,194 @@
+/**
+ * Module-qualified symbol lookup (`stage_apply::run`, `Session.request`,
+ * `configurator/stage_apply`).
+ *
+ * Pinned because the lookup vocabulary is what makes codegraph useful
+ * in workspaces with same-named symbols across modules — Rust
+ * sub-pipelines, Python `__init__.py` packages, Java packages, etc.
+ * See #173 for the original report: a `run` function in
+ * `src/configurator/stage_apply.rs` was indexed but `stage_apply::run`
+ * returned "not found" because (a) FTS strips colons to nothing,
+ * leaving a useless query, and (b) `matchesSymbol` only understood
+ * `.`-style qualifiers.
+ */
+
+import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
+
+beforeAll(async () => {
+  await initGrammars();
+  await loadAllGrammars();
+});
+
+function hasSqliteBindings(): boolean {
+  try {
+    const Database = require('better-sqlite3');
+    const db = new Database(':memory:');
+    db.close();
+    return true;
+  } catch {
+    return false;
+  }
+}
+const HAS_SQLITE = hasSqliteBindings();
+
+function tmpRoot(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-symbol-lookup-'));
+}
+
+function rmTree(dir: string): void {
+  if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+}
+
+async function buildRustWorkspace(): Promise<string> {
+  const root = tmpRoot();
+  const cfgDir = path.join(root, 'src', 'configurator');
+  fs.mkdirSync(cfgDir, { recursive: true });
+  fs.writeFileSync(
+    path.join(root, 'Cargo.toml'),
+    `[package]\nname = "fixture"\nversion = "0.1.0"\nedition = "2021"\n[lib]\npath = "src/lib.rs"\n`
+  );
+  fs.writeFileSync(path.join(root, 'src', 'lib.rs'), `pub mod configurator;\npub mod scheduler;\n`);
+  fs.writeFileSync(
+    path.join(cfgDir, 'mod.rs'),
+    `pub mod stage_apply;\npub mod stage_detect;\n`
+  );
+  fs.writeFileSync(
+    path.join(cfgDir, 'stage_apply.rs'),
+    `pub async fn run() -> Result<(), ()> {\n    render_and_write();\n    Ok(())\n}\n\nfn render_and_write() {}\n`
+  );
+  fs.writeFileSync(
+    path.join(cfgDir, 'stage_detect.rs'),
+    `pub async fn run() -> Result<(), ()> { Ok(()) }\n`
+  );
+  fs.writeFileSync(
+    path.join(root, 'src', 'scheduler.rs'),
+    `pub fn run_due_tasks() -> Result<(), ()> { Ok(()) }\n`
+  );
+  return root;
+}
+
+describe.skipIf(!HAS_SQLITE)('matchesSymbol — module-qualified lookups (#173)', () => {
+  let projectRoot: string;
+  let cg: any;
+  let handler: any;
+  let findSymbol: (cg: any, s: string) => { node: any; note: string } | null;
+  let findAllSymbols: (cg: any, s: string) => { nodes: any[]; note: string };
+
+  beforeEach(async () => {
+    projectRoot = await buildRustWorkspace();
+    const CodeGraph = (await import('../src/index')).default;
+    const { ToolHandler } = await import('../src/mcp/tools');
+    cg = CodeGraph.initSync(projectRoot, {
+      config: { include: ['**/*.rs'], exclude: [] },
+    });
+    await cg.indexAll();
+    handler = new ToolHandler(cg);
+    findSymbol = (handler as any).findSymbol.bind(handler);
+    findAllSymbols = (handler as any).findAllSymbols.bind(handler);
+  });
+
+  afterEach(() => {
+    handler?.closeAll();
+    cg?.destroy();
+    rmTree(projectRoot);
+  });
+
+  it('resolves `stage_apply::run` to the run in stage_apply.rs (not stage_detect.rs)', () => {
+    const match = findSymbol(cg, 'stage_apply::run');
+    expect(match).not.toBeNull();
+    expect(match!.node.name).toBe('run');
+    expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
+  });
+
+  it('rejects `stage_apply::run` for the same-named function in a different module', () => {
+    const all = findAllSymbols(cg, 'stage_apply::run');
+    // All returned nodes must be in stage_apply.rs — never in stage_detect.rs
+    for (const node of all.nodes) {
+      expect(node.filePath).toMatch(/stage_apply\.rs$/);
+    }
+    expect(all.nodes.length).toBeGreaterThan(0);
+  });
+
+  it('resolves `configurator::stage_apply::run` (multi-level qualifier)', () => {
+    const match = findSymbol(cg, 'configurator::stage_apply::run');
+    expect(match).not.toBeNull();
+    expect(match!.node.name).toBe('run');
+    expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
+  });
+
+  it('resolves `crate::configurator::stage_apply::run` (Rust path prefix stripped)', () => {
+    const match = findSymbol(cg, 'crate::configurator::stage_apply::run');
+    expect(match).not.toBeNull();
+    expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
+  });
+
+  it('resolves `configurator/stage_apply` (slash qualifier)', () => {
+    const match = findSymbol(cg, 'configurator/stage_apply/run');
+    expect(match).not.toBeNull();
+    expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
+  });
+
+  it('does not silently collide bare `run` with `run_due_tasks`', () => {
+    const match = findSymbol(cg, 'run');
+    expect(match).not.toBeNull();
+    // Whatever it picks, it must be an exact-name match, not a partial.
+    expect(match!.node.name).toBe('run');
+  });
+
+  it('aggregates all bare-name `run` matches across modules', () => {
+    const all = findAllSymbols(cg, 'run');
+    const names = all.nodes.map((n: any) => n.name);
+    expect(names.every((n: string) => n === 'run')).toBe(true);
+    expect(all.nodes.length).toBeGreaterThanOrEqual(2); // stage_apply + stage_detect
+    // The note should call out the ambiguity.
+    expect(all.note).toMatch(/Aggregated|symbols named "run"/);
+  });
+
+  it('still returns null for genuinely unknown qualified lookups', () => {
+    const match = findSymbol(cg, 'stage_apply::nonexistent_fn');
+    expect(match).toBeNull();
+  });
+});
+
+describe.skipIf(!HAS_SQLITE)('matchesSymbol — dotted lookups (regression for #173 fix)', () => {
+  let projectRoot: string;
+  let cg: any;
+  let handler: any;
+  let findSymbol: (cg: any, s: string) => { node: any; note: string } | null;
+
+  beforeEach(async () => {
+    projectRoot = tmpRoot();
+    const src = path.join(projectRoot, 'src');
+    fs.mkdirSync(src, { recursive: true });
+    fs.writeFileSync(
+      path.join(src, 'session.ts'),
+      `export class Session {\n  request(): void {}\n}\nexport function request(): void {}\n`
+    );
+
+    const CodeGraph = (await import('../src/index')).default;
+    const { ToolHandler } = await import('../src/mcp/tools');
+    cg = CodeGraph.initSync(projectRoot, {
+      config: { include: ['src/**/*.ts'], exclude: [] },
+    });
+    await cg.indexAll();
+    handler = new ToolHandler(cg);
+    findSymbol = (handler as any).findSymbol.bind(handler);
+  });
+
+  afterEach(() => {
+    handler?.closeAll();
+    cg?.destroy();
+    rmTree(projectRoot);
+  });
+
+  it('`Session.request` resolves to the method, not the bare function', () => {
+    const match = findSymbol(cg, 'Session.request');
+    expect(match).not.toBeNull();
+    expect(match!.node.kind).toBe('method');
+    expect(match!.node.qualifiedName).toContain('Session::request');
+  });
+});

+ 7 - 1
src/db/queries.ts

@@ -696,8 +696,14 @@ export class QueryBuilder {
     const { kinds, languages, limit = 100, offset = 0 } = options;
 
     // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
-    // Escape special FTS5 characters and add prefix wildcard
+    // Escape special FTS5 characters and add prefix wildcard.
+    //
+    // `::` is a qualifier separator in Rust/C++/Ruby, not a token char,
+    // so treat it as whitespace before the strip step. Otherwise queries
+    // like `stage_apply::run` collapse to `stage_applyrun` (the colons
+    // are stripped without splitting) and find nothing. See #173.
     const ftsQuery = query
+      .replace(/::/g, ' ') // Rust/C++/Ruby qualifier separator
       .replace(/['"*():^]/g, '') // Remove FTS5 special chars
       .split(/\s+/)
       .filter(term => term.length > 0)

+ 88 - 17
src/mcp/tools.ts

@@ -16,6 +16,21 @@ import { WASM_FALLBACK_FIX_RECIPE } from '../db';
 /** Maximum output length to prevent context bloat (characters) */
 const MAX_OUTPUT_LENGTH = 15000;
 
+/**
+ * Rust path roots that have no file-system equivalent — `crate` is the
+ * current crate, `super` is the parent module, `self` is the current
+ * module. Used by `matchesSymbol` to strip these before file-path
+ * matching so `crate::configurator::stage_apply::run` resolves the
+ * same as `configurator::stage_apply::run`.
+ */
+const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']);
+
+/** Last `::` / `.` / `/`-separated segment of a qualified symbol. */
+function lastQualifierPart(symbol: string): string {
+  const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
+  return parts[parts.length - 1] ?? symbol;
+}
+
 /**
  * Calculate the recommended number of codegraph_explore calls based on project size.
  * Larger codebases need more exploration calls to cover their surface area,
@@ -1204,9 +1219,22 @@ export class ToolHandler {
    * Returns the best match and a note about alternatives if any.
    */
   /**
-   * Check if a node matches a symbol query, supporting both simple names and
-   * qualified "Parent.child" notation (e.g., "Session.request" matches a method
-   * named "request" inside a class named "Session").
+   * Check if a node matches a symbol query.
+   *
+   * Accepts simple names (`run`) and three flavors of qualifier:
+   *   - dotted     `Session.request`         (TS/JS/Python)
+   *   - colon-pair `stage_apply::run`        (Rust, C++, Ruby)
+   *   - slash      `configurator/stage_apply` (path-ish)
+   *
+   * Multi-level qualifiers compose: `crate::configurator::stage_apply::run`
+   * works. Rust path prefixes (`crate`, `super`, `self`) are stripped so
+   * the canonical `crate::module::symbol` form resolves.
+   *
+   * Resolution order, last part must always equal `node.name`:
+   *   1. Suffix-match against `qualifiedName` (handles class-scoped methods
+   *      where the extractor builds the qualified name from the AST stack)
+   *   2. File-path containment (handles file-derived modules in Rust/
+   *      Python — `stage_apply::run` matches a `run` in `stage_apply.rs`)
    */
   private matchesSymbol(node: Node, symbol: string): boolean {
     // Simple name match
@@ -1214,21 +1242,52 @@ export class ToolHandler {
     // File basename match (e.g., "product-card" matches "product-card.liquid")
     if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true;
 
-    // Qualified name match: "Parent.child" → look for "::Parent::child" in qualified_name
-    if (symbol.includes('.')) {
-      const parts = symbol.split('.');
-      const qualifiedSuffix = parts.join('::');
-      if (node.qualifiedName.includes(qualifiedSuffix)) return true;
-    }
-
-    return false;
+    // Qualified-name lookups: split on any supported separator. `\w` keeps
+    // identifier chars (incl. `_`) intact; everything else is treated as
+    // a separator we tolerate.
+    if (!/[.\/]|::/.test(symbol)) return false;
+    const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
+    if (parts.length < 2) return false;
+
+    const lastPart = parts[parts.length - 1]!;
+    if (node.name !== lastPart) return false;
+
+    // Stage 1: qualified-name suffix match. The extractor joins the
+    // semantic hierarchy with `::`, so `Session.request` and
+    // `Session::request` both become `Session::request` here.
+    const colonSuffix = parts.join('::');
+    if (node.qualifiedName.includes(colonSuffix)) return true;
+
+    // Stage 2: file-path containment. Rust modules and Python packages
+    // are not in `qualifiedName` — they're encoded in the file path. So
+    // `stage_apply::run` matches a `run` in any file whose path
+    // contains a `stage_apply` segment (with or without an extension).
+    //
+    // Filter out Rust path prefixes that have no file-system equivalent.
+    const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p));
+    if (containerHints.length === 0) return false;
+
+    const segments = node.filePath.split('/').filter((s) => s.length > 0);
+    return containerHints.every((hint) =>
+      segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint)
+    );
   }
 
   private findSymbol(cg: CodeGraph, symbol: string): { node: Node; note: string } | null {
-    // Use higher limit for qualified lookups (e.g., "Session.request") since the
-    // target may rank lower in FTS when there are many partial matches
-    const limit = symbol.includes('.') ? 50 : 10;
-    const results = cg.searchNodes(symbol, { limit });
+    // Use higher limit for qualified lookups (e.g., "Session.request",
+    // "stage_apply::run") since the target may rank lower in FTS when
+    // there are many partial matches across the qualifier parts.
+    const isQualified = /[.\/]|::/.test(symbol);
+    const limit = isQualified ? 50 : 10;
+    let results = cg.searchNodes(symbol, { limit });
+
+    // FTS strips colons as a special char, so `stage_apply::run` searches
+    // for the literal `stage_applyrun` and finds nothing. Re-search by
+    // the bare last part and let `matchesSymbol` filter by qualifier.
+    if (isQualified && results.length === 0) {
+      const tail = lastQualifierPart(symbol);
+      if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit });
+    }
 
     if (results.length === 0 || !results[0]) {
       return null;
@@ -1250,7 +1309,11 @@ export class ToolHandler {
       return { node: picked, note };
     }
 
-    // No exact match, use best fuzzy match
+    // No exact match. For qualified lookups, don't silently fall back
+    // to a fuzzy result — the user typed a specific qualifier, and
+    // resolving `stage_apply::nonexistent_fn` to the unrelated
+    // `stage_apply.rs` file would be actively misleading (#173).
+    if (isQualified) return null;
     return { node: results[0]!.node, note: '' };
   }
 
@@ -1259,7 +1322,15 @@ export class ToolHandler {
    * results across all matching symbols (e.g., multiple classes with an `execute` method).
    */
   private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } {
-    const results = cg.searchNodes(symbol, { limit: 50 });
+    let results = cg.searchNodes(symbol, { limit: 50 });
+
+    // Mirror the fallback in `findSymbol` for qualified queries — FTS
+    // strips colons, so a module-qualified lookup needs a second pass
+    // by the bare last part.
+    if (results.length === 0 && /[.\/]|::/.test(symbol)) {
+      const tail = lastQualifierPart(symbol);
+      if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit: 50 });
+    }
 
     if (results.length === 0) {
       return { nodes: [], note: '' };