Просмотр исходного кода

fix(extraction/ruby): build receiver.method calls so instance calls resolve (#1110) (#1111)

The Ruby extractor dropped the method name from a `receiver.method` call:
`lg.log()` was recorded as a call to `lg` (the bare receiver), which
matches no symbol, so the reference resolved to nothing and no method
edge was ever produced. A Ruby method invoked through a receiver had no
recorded callers and was invisible to impact/blast-radius and explore
flow traces. This is the Ruby-specific blocker noted in #1108 — that
local-variable type-inference fix couldn't help Ruby because the call
reference itself was missing.

extractCall recognized receiver-bearing calls by the `object`/`name`/
`function` fields other grammars use; tree-sitter-ruby's `call` node uses
`receiver` + `method`, so it fell through to the generic fallback that
takes the first named child (the receiver) as the callee. Handle Ruby
`call`/`method_call` explicitly: build `receiver.method`, keep bare
`foo(...)` as the method name, emit `Foo.new` as an `instantiates` ref,
and give a capitalized (constant) receiver a `references` edge so a class
used only via its class methods still records a dependent.

With this plus #1108, `lg = Logger.new; lg.log` resolves `lg.log` to
`Logger#log`, and the two-file same-name case is same-file-correct
(#1079). Adds Ruby to the local-variable inference test matrix plus a
focused test asserting `Foo.new` stays an instantiation.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 день назад
Родитель
Сommit
3424ff36c5
3 измененных файлов с 82 добавлено и 0 удалено
  1. 1 0
      CHANGELOG.md
  2. 24 0
      __tests__/resolution.test.ts
  3. 57 0
      src/extraction/tree-sitter.ts

+ 1 - 0
CHANGELOG.md

@@ -12,6 +12,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### New Features
 
 - Method calls made through a local variable now resolve to the method in many more languages. When code does `const logger = new Logger(); logger.log();` (or the equivalent), CodeGraph infers the local variable's type from its declaration or initializer and links the call to the right method — so these calls now show up in callers, impact/blast-radius, and `codegraph_explore` flow traces instead of being dropped. Previously only C++ handled this; it now also covers TypeScript, JavaScript, Python, Java, C#, Kotlin, Swift, Go, Rust, Dart, Scala, and PHP. (#1108)
+- Ruby method calls made on a receiver (`logger.log`) now record an edge to the method. Previously the Ruby indexer kept only the receiver and discarded the method name, so a method called through a variable or object had no recorded callers and was missing from impact/blast-radius and flow traces; combined with the local-variable type inference above, `logger = Logger.new; logger.log` now links to `Logger#log`. Calls to a class method (`Foo.bar`) and object construction (`Foo.new`) are still recorded too. (#1110)
 
 ### Fixes
 

+ 24 - 0
__tests__/resolution.test.ts

@@ -1681,6 +1681,8 @@ func main() {
         src: `<?php\nclass Logger { function log() { return 1; } }\nfunction useIt() { $lg = new Logger(); return $lg->log(); }\n` },
       { lang: 'Scala (val x = new T)', file: 'Svc.scala',
         src: `class Logger { def log(): Int = 1 }\nobject A { def use(): Int = { val lg = new Logger(); lg.log() } }\n` },
+      { lang: 'Ruby (x = T.new)', file: 'svc.rb',
+        src: `class Logger\n  def log\n    1\n  end\nend\ndef use\n  lg = Logger.new\n  lg.log\nend\n` },
     ];
 
     for (const c of cases) {
@@ -1702,6 +1704,28 @@ func main() {
         ).toBeGreaterThan(0);
       });
     }
+
+    it('Ruby: builds receiver.method and keeps Foo.new as an instantiation', async () => {
+      // The Ruby extractor previously took the receiver as the callee and
+      // dropped the method name (`lg.log()` -> a call to `lg`). Now it builds
+      // `lg.log`, while `Logger.new` must still record an instantiation.
+      fs.writeFileSync(
+        path.join(tempDir, 'svc.rb'),
+        `class Logger\n  def log\n    1\n  end\nend\ndef run\n  lg = Logger.new\n  lg.log\nend\n`,
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      cg.resolveReferences();
+
+      const run = cg.getNodesByKind('function').find((n) => n.name === 'run')!;
+      const logMethod = cg.getNodesByKind('method').find((n) => n.name === 'log')!;
+      const logger = cg.getNodesByKind('class').find((n) => n.name === 'Logger')!;
+      const out = cg.getOutgoingEdges(run.id);
+
+      // lg.log resolved to the method (the receiver-type inference kicked in).
+      expect(out.some((e) => e.kind === 'calls' && e.target === logMethod.id)).toBe(true);
+      // Logger.new is still an instantiation of the class.
+      expect(out.some((e) => e.kind === 'instantiates' && e.target === logger.id)).toBe(true);
+    });
   });
 
   describe('Name Matcher: kind bias for new ref kinds', () => {

+ 57 - 0
src/extraction/tree-sitter.ts

@@ -3497,6 +3497,63 @@ export class TreeSitterExtractor {
     const callerId = this.nodeStack[this.nodeStack.length - 1];
     if (!callerId) return;
 
+    // Ruby `call` nodes use `receiver` + `method` fields (tree-sitter-ruby), not
+    // the `object`/`name`/`function` fields the branches below expect — so
+    // without this they fell through to the generic path, which took the
+    // receiver as the callee and DROPPED the method name: `lg.log()` produced a
+    // `calls` ref to `lg` (unresolvable) and no method edge was ever recorded,
+    // so a Ruby method's callers/impact were invisible (#1108 follow-up). Build
+    // `receiver.method` so the resolver — and local-variable type inference —
+    // can link it; `Foo.new` stays an instantiation.
+    if (this.language === 'ruby' && (node.type === 'call' || node.type === 'method_call')) {
+      const methodNode = getChildByField(node, 'method');
+      const methodName = methodNode ? getNodeText(methodNode, this.source) : '';
+      if (!methodName) return; // operator/element-reference call with no method name
+      const receiverNode = getChildByField(node, 'receiver');
+      const line = node.startPosition.row + 1;
+      const column = node.startPosition.column;
+      if (!receiverNode) {
+        // Bare `foo(...)` — just the method name (unchanged behavior).
+        this.unresolvedReferences.push({ fromNodeId: callerId, referenceName: methodName, referenceKind: 'calls', line, column });
+        return;
+      }
+      const receiverName = getNodeText(receiverNode, this.source);
+      // `Foo.new` / `Foo::Bar.new` is construction — emit an `instantiates` ref to
+      // the class (last `::` segment), preserving the "what creates X" edge.
+      if (methodName === 'new') {
+        const className = receiverName.includes('::')
+          ? receiverName.slice(receiverName.lastIndexOf('::') + 2)
+          : receiverName;
+        if (/^[A-Z]/.test(className)) {
+          this.unresolvedReferences.push({ fromNodeId: callerId, referenceName: className, referenceKind: 'instantiates', line, column });
+          return;
+        }
+      }
+      const SKIP_RECEIVERS = new Set(['self', 'super']);
+      const skip = SKIP_RECEIVERS.has(receiverName);
+      this.unresolvedReferences.push({
+        fromNodeId: callerId,
+        referenceName: skip ? methodName : `${receiverName}.${methodName}`,
+        referenceKind: 'calls',
+        line,
+        column,
+      });
+      // A capitalized (constant) receiver — `Foo.bar`, a class/module method call
+      // — is itself a dependency on that constant; emit a `references` ref so a
+      // class used only via its class methods still records a dependent (the edge
+      // the old receiver-only callee happened to provide, now made explicit).
+      if (!skip && receiverNode.type === 'constant') {
+        this.unresolvedReferences.push({
+          fromNodeId: callerId,
+          referenceName: receiverName,
+          referenceKind: 'references',
+          line: receiverNode.startPosition.row + 1,
+          column: receiverNode.startPosition.column,
+        });
+      }
+      return;
+    }
+
     // Get the function/method being called
     let calleeName = '';