Преглед на файлове

feat(resolution): infer local-variable receiver types across languages (#1108) (#1109)

Instance calls through a local variable — `const lg = new Logger();
lg.log();` — only resolved to the method in C++. Every other language
produced no `calls` edge, because the resolver had no way to learn the
receiver variable's type, so such calls were missing from callers,
impact/blast-radius, and explore flow traces.

Local variables aren't indexed as nodes (node-explosion), so — like the
existing C++ inferrer — this reads the enclosing function's source and
matches the receiver's declaration/initializer to recover its type, then
hands it to resolveMethodOnType. That validates the method actually
exists on the inferred type, so a mis-inference yields no edge, which is
what lets the per-language patterns stay simple. The scan is bounded to
the enclosing scope so a same-named variable in another function can't
leak in.

Generalizes the C++-only path in matchMethodCall into a language dispatch:
C++ keeps its dedicated header-aware inferrer; a new shared
inferLocalReceiverType covers TypeScript, JavaScript, Python, Java, C#,
Kotlin, Swift, Go, Rust, Dart, Scala, and PHP, matching each language's
declaration shapes (`= new T`, `= T(...)`, `= T.new`, `let x = T{}`,
`x := T{}`, `T x = ...`, `x: T`, etc.). For Java/Kotlin an import FQN
still pins which same-named class is meant (#314); other languages fall
back to the call-site's own file (#1079).

Ruby is not covered: its extractor emits no `receiver.method()` call
reference in the first place, so there is nothing for resolution to
resolve — a separate extraction-layer gap.

Adds a parameterized end-to-end test covering all twelve languages. Full
suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry преди 1 ден
родител
ревизия
ed64db08b4
променени са 3 файла, в които са добавени 237 реда и са изтрити 3 реда
  1. 4 0
      CHANGELOG.md
  2. 54 0
      __tests__/resolution.test.ts
  3. 179 3
      src/resolution/name-matcher.ts

+ 4 - 0
CHANGELOG.md

@@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### New Features
+
+- Method calls made through a local variable now resolve to the method in many more languages. When code does `const logger = new Logger(); logger.log();` (or the equivalent), CodeGraph infers the local variable's type from its declaration or initializer and links the call to the right method — so these calls now show up in callers, impact/blast-radius, and `codegraph_explore` flow traces instead of being dropped. Previously only C++ handled this; it now also covers TypeScript, JavaScript, Python, Java, C#, Kotlin, Swift, Go, Rust, Dart, Scala, and PHP. (#1108)
+
 ### Fixes
 
 - Indexing a large project no longer gets killed partway through with a "Main thread unresponsive — killing the wedged process" message. The safety watchdog that stops a genuinely stuck index was mistaking slow-but-normal work for a hang: on a big repo, linking up references and cross-file relationships can legitimately run for a while, and that work now regularly yields so the watchdog can tell real progress from a true stall. Projects that previously failed to finish `codegraph init` / `codegraph index` (and had to fall back to `CODEGRAPH_NO_WATCHDOG=1`) now complete normally, while a genuinely hung process is still caught. Thanks @zmcrazy, @YoungLiao, and @GeeLab-Mob for the reports. (#1091)

+ 54 - 0
__tests__/resolution.test.ts

@@ -1650,6 +1650,60 @@ func main() {
     });
   });
 
+  describe('Local-variable receiver-type inference (#1108)', () => {
+    // `lg.log()` where `lg` is a local whose type is inferred from its
+    // declaration/initializer. Before this, only C++ resolved these; every
+    // other language produced no method edge. Each case is one file with a
+    // single Logger + a caller using a local-variable receiver — a correct
+    // resolution makes the caller a caller of `log`.
+    const cases: Array<{ lang: string; file: string; src: string }> = [
+      { lang: 'TypeScript (= new T)', file: 'svc.ts',
+        src: `class Logger { log() { return 1; } }\nexport function use() { const lg = new Logger(); return lg.log(); }\n` },
+      { lang: 'JavaScript (= new T)', file: 'svc.js',
+        src: `class Logger { log() { return 1; } }\nexport function use() { const lg = new Logger(); return lg.log(); }\n` },
+      { lang: 'Python (= T())', file: 'svc.py',
+        src: `class Logger:\n    def log(self):\n        return 1\ndef use():\n    lg = Logger()\n    return lg.log()\n` },
+      { lang: 'Java (T x = new T)', file: 'Svc.java',
+        src: `class Logger { void log() { int a = 1; } }\nclass Use { void run() { Logger lg = new Logger(); lg.log(); } }\n` },
+      { lang: 'C# (var x = new T)', file: 'Svc.cs',
+        src: `class Logger { void Log() { int a = 1; } }\nclass Use { void Run() { var lg = new Logger(); lg.Log(); } }\n` },
+      { lang: 'Kotlin (val x = T())', file: 'Svc.kt',
+        src: `class Logger { fun log(): Int { return 1 } }\nfun use(): Int { val lg = Logger(); return lg.log() }\n` },
+      { lang: 'Swift (let x = T())', file: 'svc.swift',
+        src: `class Logger { func log() -> Int { return 1 } }\nfunc use() -> Int { let lg = Logger(); return lg.log() }\n` },
+      { lang: 'Go (x := T{})', file: 'svc.go',
+        src: `package a\ntype Logger struct{}\nfunc (l Logger) Log() int { return 1 }\nfunc Use() int { lg := Logger{}; return lg.Log() }\n` },
+      { lang: 'Rust (let x = T{})', file: 'svc.rs',
+        src: `pub struct Logger { n: i32 }\nimpl Logger { pub fn log(&self) -> i32 { self.n } }\npub fn use_it() -> i32 { let lg = Logger { n: 1 }; lg.log() }\n` },
+      { lang: 'Dart (var x = T())', file: 'svc.dart',
+        src: `class Logger { int log() { return 1; } }\nint use() { var lg = Logger(); return lg.log(); }\n` },
+      { lang: 'PHP ($x = new T)', file: 'svc.php',
+        src: `<?php\nclass Logger { function log() { return 1; } }\nfunction useIt() { $lg = new Logger(); return $lg->log(); }\n` },
+      { lang: 'Scala (val x = new T)', file: 'Svc.scala',
+        src: `class Logger { def log(): Int = 1 }\nobject A { def use(): Int = { val lg = new Logger(); lg.log() } }\n` },
+    ];
+
+    for (const c of cases) {
+      it(`resolves a local-variable method call — ${c.lang}`, async () => {
+        fs.writeFileSync(path.join(tempDir, c.file), c.src);
+        cg = await CodeGraph.init(tempDir, { index: true });
+        cg.resolveReferences();
+
+        const logMethod = cg
+          .getNodesByKind('method')
+          .find((n) => n.name.toLowerCase() === 'log');
+        expect(logMethod, `${c.lang}: log method should be indexed`).toBeDefined();
+
+        // The enclosing caller resolves through the local variable to `log`.
+        const callers = cg.getCallers(logMethod!.id).map((x) => x.node.name);
+        expect(
+          callers.length,
+          `${c.lang}: log should have a caller (got [${callers.join(', ')}])`,
+        ).toBeGreaterThan(0);
+      });
+    }
+  });
+
   describe('Name Matcher: kind bias for new ref kinds', () => {
     const baseContext = (candidates: Node[]): ResolutionContext => ({
       getNodesInFile: () => [],

+ 179 - 3
src/resolution/name-matcher.ts

@@ -4,7 +4,7 @@
  * Handles symbol name matching for reference resolution.
  */
 
-import { Node } from '../types';
+import { Language, Node } from '../types';
 import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
 
 /**
@@ -1019,6 +1019,165 @@ function inferJavaFieldReceiverType(
   return lastPart;
 }
 
+// ── Local-variable receiver-type inference (#1108) ──────────────────────────
+//
+// Instance calls through a local variable (`const lg = new Logger(); lg.log()`)
+// only resolved in C++ before this — no other language could learn the
+// receiver's type. Local variables are not indexed as nodes (node-explosion),
+// so, like the C++ inferrer above, we read the enclosing function's source and
+// match the receiver's declaration/initializer to recover its type. The type is
+// then handed to resolveMethodOnType, which VALIDATES that the type actually
+// declares the method, so a mis-inference produces NO edge — the safety net
+// that lets the patterns below stay simple. C++ keeps its dedicated inferrer
+// (header scan + `auto`); this covers every other language.
+
+// Tokens a loose pattern might capture that are never a user-defined type.
+const NON_TYPE_RECEIVER_TOKENS = new Set([
+  'this', 'self', 'super', 'new', 'return', 'await', 'yield', 'typeof',
+  'null', 'nil', 'None', 'true', 'false', 'True', 'False', 'undefined',
+]);
+
+/**
+ * Normalize a captured type expression to a simple type name: drop generic
+ * args and pointer/ref markers, take the last `.`/`::`-qualified segment, and
+ * reject obvious non-types.
+ */
+function normalizeInferredTypeName(raw: string): string | null {
+  const cleaned = raw.replace(/<[^>]*>/g, '').replace(/[&*]/g, '').trim();
+  const seg = cleaned.split(/[.:]+/).filter(Boolean).pop();
+  if (!seg) return null;
+  if (NON_TYPE_RECEIVER_TOKENS.has(seg)) return null;
+  return seg;
+}
+
+/**
+ * Per-language patterns that recover a local variable's (or typed parameter's)
+ * type from its declaration/initializer. Each regex captures the type in group
+ * 1; `r` is the already-escaped receiver name. Ordered most-specific first.
+ * PascalCase is required in the capture where the language convention allows,
+ * as a cheap false-positive guard on top of resolveMethodOnType's validation.
+ */
+function localReceiverTypePatterns(language: Language, r: string): RegExp[] {
+  switch (language) {
+    case 'typescript':
+    case 'javascript':
+    case 'tsx':
+    case 'jsx':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_$][\\w.$]*)`), // = new Logger()
+        new RegExp(`\\b(?:const|let|var)\\s+${r}\\s*:\\s*([A-Z][\\w.$]*)`), // lg: Logger
+      ];
+    case 'python':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // lg = Logger(...)
+        new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // lg: Logger  (PEP 526)
+      ];
+    case 'java':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
+        new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg;  / param
+      ];
+    case 'kotlin':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // val lg = Logger(...)
+        new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger  / param
+      ];
+    case 'csharp':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
+        new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg;  / param
+      ];
+    case 'swift':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // let lg = Logger(...)
+        new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // let lg: Logger  / param
+      ];
+    case 'rust':
+      return [
+        new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\b(?:\\s*:[^=]+)?=\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg = Logger::new()/Logger{}/Logger
+        new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\s*:\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg: Logger
+      ];
+    case 'go':
+      return [
+        new RegExp(`\\b${r}\\b\\s*:=\\s*&?([A-Za-z_][\\w.]*)\\s*{`), // lg := Logger{} / &Logger{}
+        new RegExp(`\\bvar\\s+${r}\\s+\\*?([A-Za-z_][\\w.]*)`), // var lg Logger / *Logger
+      ];
+    case 'ruby':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w:]*)\\.new\\b`), // lg = Logger.new
+      ];
+    case 'scala':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*(?:new\\s+)?([A-Z][\\w.]*)`), // val lg = new Logger / Logger(...)
+        new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger  / param
+      ];
+    case 'dart':
+      return [
+        new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // var lg = Logger(...)
+        new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;]`), // Logger lg = ...
+      ];
+    case 'php':
+      return [
+        new RegExp(`\\$?${r}\\b\\s*=\\s*new\\s+([A-Za-z_\\\\][\\w\\\\]*)`), // $lg = new Logger()
+      ];
+    default:
+      return [];
+  }
+}
+
+/** 1-based start line of the tightest function/method enclosing the call. */
+function enclosingScopeStartLine(ref: UnresolvedRef, context: ResolutionContext): number {
+  let start = 1;
+  for (const n of context.getNodesInFile(ref.filePath)) {
+    if (n.kind !== 'function' && n.kind !== 'method') continue;
+    if (n.language !== ref.language) continue;
+    const end = n.endLine ?? n.startLine;
+    if (n.startLine <= ref.line && end >= ref.line && n.startLine >= start) {
+      start = n.startLine;
+    }
+  }
+  return start;
+}
+
+/**
+ * Infer a receiver's type from its local declaration/initializer in the
+ * enclosing function body. Language-dispatched; returns null for languages
+ * without patterns or when no declaration is found. Bounded to the enclosing
+ * scope so a same-named variable in another function can't leak in.
+ */
+function inferLocalReceiverType(
+  receiverName: string,
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): string | null {
+  const patterns = localReceiverTypePatterns(
+    ref.language,
+    receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'),
+  );
+  if (patterns.length === 0) return null;
+
+  const source = context.readFile(ref.filePath);
+  if (!source) return null;
+
+  const lines = source.split(/\r?\n/);
+  const callIdx = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
+  const startIdx = Math.max(0, enclosingScopeStartLine(ref, context) - 1);
+
+  // Nearest declaration wins: scan backward from the call to the scope start.
+  for (let i = callIdx; i >= startIdx; i--) {
+    const line = lines[i];
+    if (!line) continue;
+    for (const re of patterns) {
+      const m = line.match(re);
+      if (m && m[1]) {
+        const type = normalizeInferredTypeName(m[1]);
+        if (type) return type;
+      }
+    }
+  }
+  return null;
+}
+
 /**
  * Try to resolve by method name on a class/object
  */
@@ -1045,9 +1204,25 @@ export function matchMethodCall(
 
   const [, objectOrClass, methodName] = match;
 
-  if (ref.language === 'cpp' && dotMatch) {
-    const inferredType = inferCppReceiverType(objectOrClass!, ref, context);
+  // Infer the receiver's type from its local declaration/initializer in the
+  // enclosing scope, then resolve the method on that type (#1108). C++ keeps its
+  // dedicated inferrer (header scan + `auto`); every other language uses the
+  // shared source-based inferrer. resolveMethodOnType validates the method
+  // exists on the inferred type, so a mis-inference produces no edge.
+  if (dotMatch) {
+    const inferredType =
+      ref.language === 'cpp'
+        ? inferCppReceiverType(objectOrClass!, ref, context)
+        : inferLocalReceiverType(objectOrClass!, ref, context);
     if (inferredType) {
+      // Java/Kotlin: when two classes share the simple name, the file's import
+      // pins WHICH one (#314). Other languages disambiguate by call-site file.
+      const importedFqn =
+        ref.language === 'java' || ref.language === 'kotlin'
+          ? context
+              .getImportMappings(ref.filePath, ref.language)
+              .find((i) => i.localName === inferredType)?.source
+          : undefined;
       const typedMatch = resolveMethodOnType(
         inferredType,
         methodName!,
@@ -1055,6 +1230,7 @@ export function matchMethodCall(
         context,
         0.9,
         'instance-method',
+        importedFqn,
       );
       if (typedMatch) {
         return typedMatch;