/** * Name Matcher * * Handles symbol name matching for reference resolution. */ import { Language, Node } from '../types'; import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types'; /** * Ceiling on how many same-named definitions a FUZZY name-match strategy will * score. A name defined more times than this is "ubiquitous" — a method/symbol * re-declared across a vendored theme or SDK (e.g. `init`/`update`/`render` on * every widget of a committed Metronic theme — #999). No directory-proximity or * receiver-word-overlap score can reliably pick THE one true target among * thousands, so the fuzzy strategies (matchByExactName's findBestMatch, and * matchMethodCall Strategy 3) decline above the ceiling instead of emitting a * low-confidence, almost-certainly-wrong edge. This also caps their per-ref cost * at O(ceiling): without it, K same-named refs each scored K candidates — the * O(K²) blow-up that pinned a core for 15-28 min at "Resolving refs … 94%" on a * repo vendoring a large JS/TS theme (#999). The PRECISE strategies are * unaffected: qualified-name, import-based, and class-name (Strategy 1/2) * resolution all still run and resolve a ubiquitous name when the context names * its exact target. Real repos top out near ~40 same-named methods, so a normal * codebase never reaches this; only bulk-vendored code does. Tune via * `CODEGRAPH_AMBIGUOUS_NAME_CEILING`. */ const DEFAULT_AMBIGUOUS_NAME_CEILING = 500; function resolveAmbiguousNameCeiling(): number { const raw = process.env.CODEGRAPH_AMBIGUOUS_NAME_CEILING; if (!raw) return DEFAULT_AMBIGUOUS_NAME_CEILING; const parsed = Number.parseInt(raw, 10); return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_AMBIGUOUS_NAME_CEILING; } const AMBIGUOUS_NAME_CEILING = resolveAmbiguousNameCeiling(); /** * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid") * by matching the filename against file nodes. */ export function matchByFilePath( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so // leave it to the symbol-matching strategies. if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) { return null; } // Extract the filename from the path const fileName = ref.referenceName.split('/').pop(); if (!fileName) return null; // Search for file nodes with this name const candidates = context.getNodesByName(fileName); const fileNodes = candidates.filter(n => n.kind === 'file'); if (fileNodes.length === 0) return null; // Prefer exact path match on qualified_name const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName); if (exactMatch) { return { original: ref, targetNodeId: exactMatch.id, confidence: 0.95, resolvedBy: 'file-path', }; } // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches // "src/snippets/foo.liquid"). When several files share the basename — a // `#include "RNCAsyncStorage.h"` with a same-named header on another platform // (windows/code/ vs apple/) — prefer the one in the includer's own directory, // then by directory proximity / same language family. A C/C++ include (and any // bare-filename import) resolves relative to the including file, not to an // arbitrary same-named header elsewhere in the tree. const suffixMatches = fileNodes.filter( n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName) ); if (suffixMatches.length > 0) { return { original: ref, targetNodeId: pickClosestFileNode(suffixMatches, ref).id, confidence: 0.85, resolvedBy: 'file-path', }; } // If only one file node with this name, use it with lower confidence if (fileNodes.length === 1) { return { original: ref, targetNodeId: fileNodes[0]!.id, confidence: 0.7, resolvedBy: 'file-path', }; } return null; } /** * Among several file nodes that all match a bare include/import by basename, * pick the one closest to the referencing file: same directory first, then by * directory-tree proximity, with the same language family as a tiebreak. A * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the * including file — not to an arbitrary same-named header on another platform. */ function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node { const dirOf = (p: string): string => { const i = p.lastIndexOf('/'); return i >= 0 ? p.slice(0, i) : ''; }; const refDir = dirOf(ref.filePath); const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir); const pool = sameDir.length > 0 ? sameDir : candidates; let best = pool[0]!; let bestScore = -Infinity; for (const c of pool) { const score = computePathProximity(ref.filePath, c.filePath) + (sameLanguageFamily(c.language, ref.language) ? 5 : 0); if (score > bestScore) { bestScore = score; best = c; } } return best; } /** * Language families that share a type system / runtime, so a same-language-only * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java * `Foo`). Anything not listed forms its own singleton family. */ const LANGUAGE_FAMILY: Record = { java: 'jvm', kotlin: 'jvm', scala: 'jvm', swift: 'apple', objc: 'apple', typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web', c: 'c', cpp: 'c', // Razor/Blazor markup names C# types — same family so `@model Foo` / // `` resolve to their `.cs` class through the cross-family gate. csharp: 'dotnet', razor: 'dotnet', }; export function sameLanguageFamily(a: string, b: string): boolean { if (a === b) return true; const fa = LANGUAGE_FAMILY[a]; return fa !== undefined && fa === LANGUAGE_FAMILY[b]; } /** * True when `lang` belongs to a known multi-language family (jvm/apple/web/c). * Languages not listed (php, python, go, ruby, rust, dart, …) and config * formats (yaml/xml/blade) form their own singleton families and return * `false` — used to leave config↔code framework bridges (whose config side is * never a known programming-language family) out of the cross-family gate. */ export function isKnownLanguageFamily(lang: string): boolean { return LANGUAGE_FAMILY[lang] !== undefined; } /** * True when `a` and `b` are two DIFFERENT *known* language families — the * signature of a coincidental cross-language name collision (a TS `import * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a * same-named ObjC header on another platform). The both-*known* test is * deliberately weaker than {@link sameLanguageFamily}'s negation: a * single-file-component language that carries its own tag (`vue`/`svelte`) * importing a `.ts` module, or any singleton-family language (php/go/ruby/…), * returns `false` here and is left alone. */ export function crossesKnownFamily(a: string, b: string): boolean { return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b); } /** * Drop cross-language candidates from a name lookup. Two regimes: * - `references` (type-usage): a type named in language X resolves to a * SAME-family type, never a coincidentally same-named symbol in another * language (the Android `BatteryManager` system class vs a JS one). Strict * same-family filter — cross-language communication is `calls`, not refs. * - `imports` (import binding): an `import`/`#include` never crosses two * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives. */ function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] { if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') { return candidates.filter((c) => sameLanguageFamily(c.language, ref.language)); } if (ref.referenceKind === 'imports') { return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language)); } return candidates; } /** * Resolve a function-as-value reference (#756) — a function name used as a * callback/function-pointer value (`register(handler)`, `o->cb = handler`, * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed * for `function_ref` refs: exact name, function/method targets only, same * language family, same-file first, and cross-file only when the match is * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback * edge is worse than none. */ export function matchFunctionRef( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // `this.` refs are resolved ONLY by the class-scoped resolver in // resolveOne (resolveThisMemberFnRef) — never by name matching here. if (ref.referenceName.startsWith('this.')) return null; // In JS/TS/Python a bare identifier can never be a method value (methods // are only reachable through a receiver — `this.m` / `self.m` / // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the // pre-existing TS quirk of class fields extracting as method-kind nodes, // which otherwise soaked up local names passed as arguments (excalidraw // A/B finding; same pattern in vendored docopt.py). Python's `self.m` // form keeps method targets via its own capture shape. C++ likewise: a // bare identifier can only be a FREE function (member values need // `&Cls::method`). PHP string callables name global FUNCTIONS (methods // need the `[$obj, 'm']` array form, which carries its own shape). Other // languages keep method targets: C# method groups, Swift/Dart // implicit-self, Java/Kotlin method references. const bareFnOnly = ref.language === 'typescript' || ref.language === 'tsx' || ref.language === 'javascript' || ref.language === 'jsx' || ref.language === 'cpp' || ref.language === 'python' || ref.language === 'php'; // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"): // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m` // shape is an explicit member reference). Unique-or-drop like everything else. if (ref.referenceName.includes('::')) { const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2); const scoped = context .getNodesByName(memberName) .filter( (n) => (n.kind === 'function' || n.kind === 'method') && sameLanguageFamily(n.language, ref.language) && n.id !== ref.fromNodeId && (n.qualifiedName === ref.referenceName || n.qualifiedName.endsWith(`::${ref.referenceName}`)) ); if (scoped.length === 0) return null; const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath); const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped; if (sameFileScoped.length === 0 && scoped.length > 1) return null; const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b)); return { original: ref, targetNodeId: target.id, confidence: 0.9, resolvedBy: 'function-ref', }; } let candidates = context .getNodesByName(ref.referenceName) .filter( (n) => (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) && sameLanguageFamily(n.language, ref.language) && n.id !== ref.fromNodeId // a function registering itself is not a dependency edge ); if (candidates.length === 0) return null; // Swift implicit-self: a bare identifier can name a METHOD only of the // ENCLOSING type (`Button(action: handleTap)` written inside that type) — // a same-named method on any OTHER class is a parameter collision // (Alamofire: a `request` parameter resolving to EventMonitor::request). // Scope method candidates to the from-symbol's type; top-level code has no // implicit self, so method targets are excluded there entirely. Free // functions are unaffected. if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) { const fromNode = context.getNodeById?.(ref.fromNodeId); const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1; const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null; candidates = candidates.filter((n) => { if (n.kind !== 'method') return true; if (!classPrefix) return false; const mSep = n.qualifiedName.lastIndexOf('::'); if (mSep <= 0) return false; const methodPrefix = n.qualifiedName.slice(0, mSep); // Accept exact-scope matches plus suffix relationships either way, so // extension-declared members (`Holder::m`) still match a nested // from-scope (`Module::Holder::wire`) and vice versa. return ( methodPrefix === classPrefix || methodPrefix.endsWith(`::${classPrefix}`) || classPrefix.endsWith(`::${methodPrefix}`) ); }); if (candidates.length === 0) return null; } // Same-file definition wins — the extraction gate guarantees most survivors // have one, and it's the dominant C pattern (static callback registered in // a same-file ops struct). const sameFile = candidates.filter((n) => n.filePath === ref.filePath); if (sameFile.length > 0) { // Swift: several same-named METHODS in one file is an API overload family // (`Session.request(...)` × N), and a bare identifier hitting it is almost // always a same-named parameter, not a method value (Alamofire A/B // finding) — refuse rather than guess. A single method (SwiftUI's // `action: handleTap`) still resolves. if ( ref.language === 'swift' && sameFile.length > 1 && sameFile.every((n) => n.kind === 'method') ) { return null; } // Same-name overloads in one file are the same conceptual symbol; pick // the first by position for determinism. const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b)); return { original: ref, targetNodeId: target.id, confidence: sameFile.length === 1 ? 0.95 : 0.9, resolvedBy: 'function-ref', }; } // Cross-file (imported names the import resolver didn't already claim): // only an unambiguous match resolves. if (candidates.length === 1) { return { original: ref, targetNodeId: candidates[0]!.id, confidence: 0.8, resolvedBy: 'function-ref', }; } return null; } /** * Try to resolve a reference by exact name match */ export function matchByExactName( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // `import`-kind nodes are import STATEMENTS, not definitions, so a reference // resolving to a sibling file's `import` is a meaningless edge — the real // import→definition resolution is the import resolver's job (resolveViaImport), // never name-matching here. Excluding them also removes a quadratic blow-up: // a ubiquitous package (`react`, `@superset-ui/core`, Python `logging`/`typing`) // is re-declared as an `import` node in every file that imports it, so K // unresolved import refs each scored K same-named import candidates through // findBestMatch — O(K²) per package, the dominant cost of "Resolving refs" on // large import-heavy (front-end + back-end) repos (#915). const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref) .filter((n) => n.kind !== 'import'); if (candidates.length === 0) { return null; } // If only one match, use it — but penalize cross-language matches if (candidates.length === 1) { const isCrossLanguage = candidates[0]!.language !== ref.language; return { original: ref, targetNodeId: candidates[0]!.id, confidence: isCrossLanguage ? 0.5 : 0.9, resolvedBy: 'exact-match', }; } // Ubiquitous-name ceiling (#999): above it, picking one target among K // same-named defs by directory proximity is unreliable AND O(K) per ref — the // quadratic behind the "Resolving refs" wedge on theme/SDK-vendoring repos. // Decline; the precise strategies (qualified-name, import, class-name) already // ran. Falls through to fuzzy, which itself only resolves a UNIQUE candidate. if (candidates.length > AMBIGUOUS_NAME_CEILING) { return null; } // Multiple matches - try to narrow down const bestMatch = findBestMatch(ref, candidates, context); if (bestMatch) { // Lower confidence when the match is from a distant/unrelated module const proximity = computePathProximity(ref.filePath, bestMatch.filePath); const confidence = proximity >= 30 ? 0.7 : 0.4; return { original: ref, targetNodeId: bestMatch.id, confidence, resolvedBy: 'exact-match', }; } return null; } /** * Try to resolve by qualified name */ export function matchByQualifiedName( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Check if the reference name looks qualified (contains :: or .) if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) { return null; } const candidates = context.getNodesByQualifiedName(ref.referenceName); if (candidates.length === 1) { return { original: ref, targetNodeId: candidates[0]!.id, confidence: 0.95, resolvedBy: 'qualified-name', }; } // Several symbols share this exact qualified name (e.g. `Logger::log` declared // in two files — an ODR clash or separate translation units): prefer the one // in the call site's own file before the partial-match fallback below, else // the first-indexed def wins and a call in `b/svc` targets `a/svc` (#1079). if (candidates.length > 1) { const ordered = preferCallSiteFile(candidates, ref.filePath); if (ordered[0]!.filePath === ref.filePath) { return { original: ref, targetNodeId: ordered[0]!.id, confidence: 0.95, resolvedBy: 'qualified-name', }; } } // Try partial qualified name match — again preferring the call site's own // file when more than one symbol's qualifiedName ends with the reference. const parts = ref.referenceName.split(/[:.]/); const lastName = parts[parts.length - 1]; if (lastName) { const partialCandidates = context .getNodesByName(lastName) .filter((candidate) => candidate.qualifiedName.endsWith(ref.referenceName)); const chosen = preferCallSiteFile(partialCandidates, ref.filePath)[0]; if (chosen) { return { original: ref, targetNodeId: chosen.id, confidence: 0.85, resolvedBy: 'qualified-name', }; } } return null; } /** * When a symbol name is ambiguous across files, prefer the candidate(s) declared * in the call site's own file, keeping the rest in their original order (#1079). * A same-file definition is the strongest language-agnostic signal for which of * several same-named symbols a call means; without it, resolution collapses onto * whichever was indexed first, so a call in `b/svc` wrongly targets `a/svc`. * No-op when there are <2 candidates or none share the call site's file. */ export function preferCallSiteFile(nodes: Node[], callSiteFile: string): Node[] { if (nodes.length < 2) return nodes; const same: Node[] = []; const other: Node[] = []; for (const n of nodes) { if (n.filePath === callSiteFile) same.push(n); else other.push(n); } return same.length ? [...same, ...other] : nodes; } // Exported for the precedence unit tests (#1079): they assert the // preferredFqn → same-file → matches[0] ordering directly. export function resolveMethodOnType( typeName: string, methodName: string, ref: UnresolvedRef, context: ResolutionContext, confidence: number, resolvedBy: ResolvedRef['resolvedBy'], /** * Optional FQN that identifies WHICH class declaration `typeName` * refers to in the caller's file. When multiple candidates share * the same qualifiedName (`FooConverter::convert` in both * `dao/converter/` and `service/converter/`), the FQN's * file-path-suffix picks the right one — the disambiguation * signal Java imports carry but the call site doesn't (#314). */ preferredFqn?: string, /** Recursion guard for the supertype/conformance walk. */ depth = 0, ): ResolvedRef | null { // Look up methods by name and match by qualifiedName ending in // `::`. This works whether the method is defined // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp). // The previous same-file approach missed the latter — the typical C++ layout. const methodCandidates = context.getNodesByName(methodName); const want = `${typeName}::${methodName}`; const matches: Node[] = []; for (const m of methodCandidates) { if (m.kind !== 'method') continue; if (m.language !== ref.language) continue; const qn = m.qualifiedName; if (qn === want || qn.endsWith(`::${want}`)) { matches.push(m); } } if (matches.length === 0) { // Conformance fallback: the method may be defined on a supertype `typeName` // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol- // extension method, a C# default-interface or extension method, a Kotlin // extension on a supertype). Walk supertypes transitively (depth-capped) via // the resolved implements/extends edges — empty in the first resolution pass, // populated in the conformance pass. Still VALIDATED (the method must exist on // a supertype), so a wrong inference produces no edge. if (depth < 4 && context.getSupertypes) { for (const supertype of context.getSupertypes(typeName, ref.language)) { const via = resolveMethodOnType( supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1, ); if (via) return via; } } return null; } if (matches.length > 1 && preferredFqn) { const ext = ref.language === 'kotlin' ? '.kt' : '.java'; const fqnPath = preferredFqn.replace(/\./g, '/') + ext; const chosen = matches.find((m) => { const fp = m.filePath.replace(/\\/g, '/'); return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath); }); if (chosen) { return { original: ref, targetNodeId: chosen.id, confidence, resolvedBy, }; } } // Language-agnostic disambiguation: when several same-named methods survive // (e.g. two files each declaring `class Logger { void log(); }` — an ODR // clash, an anonymous-namespace type, or separate translation units), prefer // the definition in the CALL SITE's own file. Without this, every ambiguous // call collapses onto the first-indexed definition, so a call in `b/svc.cpp` // wrongly points at `a/svc.cpp` (#1079). This runs AFTER the `preferredFqn` // block, so Java/Kotlin import disambiguation — whose target is intentionally // in ANOTHER file (#314) — is unaffected: that block returns early whenever // an import FQN pins the class. const ordered = preferCallSiteFile(matches, ref.filePath); return { original: ref, targetNodeId: ordered[0]!.id, confidence, resolvedBy, }; } // C++ keywords/control-flow tokens that can appear right before a receiver // (e.g. `return ptr->m()`) and must NOT be treated as a type. const CPP_NON_TYPE_TOKENS = new Set([ 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default', 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield', 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast', 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor', ]); function normalizeCppTypeName(typeName: string): string | null { const normalized = typeName .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ') .replace(/[&*]+/g, ' ') .replace(/<[^>]*>/g, ' ') .replace(/\s+/g, ' ') .trim(); if (!normalized) return null; const parts = normalized.split(/::/).filter(Boolean); const last = parts[parts.length - 1]; if (!last) return null; if (CPP_NON_TYPE_TOKENS.has(last)) return null; return last; } // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`, // `Type*receiver`, `Type receiver`, etc., REQUIRING a declarator terminator // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The // terminator rules out uses like `return receiver->m()` where the preceding // token is a keyword, not a type. function buildDeclaratorRegex(escapedReceiver: string): RegExp { return new RegExp( `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`, ); } function inferCppReceiverType( receiverName: string, ref: UnresolvedRef, context: ResolutionContext, depth = 0, ): string | null { const source = context.readFile(ref.filePath); if (!source) return null; const lines = source.split(/\r?\n/); const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1)); const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`); const declaratorRegex = buildDeclaratorRegex(escapedReceiver); for (let i = callLineIndex; i >= 0; i--) { const line = lines[i]; if (!line || !receiverPattern.test(line)) continue; const declaratorMatch = line.match(declaratorRegex); if (declaratorMatch) { const normalized = normalizeCppTypeName(declaratorMatch[1] ?? ''); if (normalized === 'auto') { // `auto x = Foo::instance();` — the declared type is deduced; recover it // from the initializer (call return type / construction) (#645). const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth); if (initType) return initType; // No usable initializer on this line — keep scanning earlier ones. } else if (normalized) { return normalized; } } } const headerCandidates = [ ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'), ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'), ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'), ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath); for (const headerPath of headerCandidates) { if (!context.fileExists(headerPath)) continue; const headerSource = context.readFile(headerPath); if (!headerSource) continue; for (const line of headerSource.split(/\r?\n/)) { if (!receiverPattern.test(line)) continue; const declaratorMatch = line.match(declaratorRegex); if (!declaratorMatch) continue; const normalized = normalizeCppTypeName(declaratorMatch[1] ?? ''); if (normalized && normalized !== 'auto') return normalized; } } return null; } /** * Last `::`-separated segment of a (possibly namespace-qualified) C++ name. */ function cppLastSegment(name: string): string { const parts = name.split('::').filter(Boolean); return parts[parts.length - 1] ?? name; } /** * Return type captured at extraction for `Class::method` (or a free function), * read off the indexed node's `returnType` — used by the C++ (#645) and PHP * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no * return type was recorded (a `void`/primitive return). */ function lookupCalleeReturnType( callee: string, ref: UnresolvedRef, context: ResolutionContext, ): string | null { let method = callee; let cls: string | null = null; if (callee.includes('::')) { const parts = callee.split('::').filter(Boolean); method = parts[parts.length - 1] ?? callee; cls = parts.slice(0, -1).join('::'); } const candidates = context.getNodesByName(method).filter( (n) => (n.kind === 'method' || n.kind === 'function') && n.language === ref.language && !!n.returnType, ); if (cls) { const want = `${cls}::${method}`; // The call site may name the class with MORE namespace qualification than // the stored node (`details::registry::instance` at the call vs // `registry::instance` on the node — the receiver type only carries the // immediate class), or LESS. Accept an exact match or either being a // namespace-suffix of the other; the shared `::::` tail keeps // it specific. const m = candidates.find( (n) => n.qualifiedName === want || n.qualifiedName.endsWith(`::${want}`) || want.endsWith(`::${n.qualifiedName}`), ); return m?.returnType ?? null; } return candidates.find((n) => n.kind === 'function')?.returnType ?? null; } /** Does the graph contain a class/struct named `name`'s last segment? */ function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean { const last = cppLastSegment(name); return context .getNodesByName(last) .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language); } /** * Infer the class produced by a C++ call/construction expression, using return * types captured at extraction (#645). Handles, in order: * - `make_unique()` / `make_shared()` → T * - single-level member call `recv.method()` → recv's type, then method's return * - `Class::method()` / free `func()` → the callee's recorded return type * - direct construction `Type()` / `ns::Type()` → Type * Returns null when undeterminable. Callers MUST still validate the outer method * exists on the result before creating an edge, so a wrong guess stays silent. */ function resolveCppCallResultType( inner: string, ref: UnresolvedRef, context: ResolutionContext, depth = 0, ): string | null { if (depth > 3) return null; // guard against pathological mutual recursion const expr = inner.trim(); const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/); if (make) return make[1] ?? null; // Single-level member call `recv.method` (the `manager.view().render()` shape). const dotIdx = expr.lastIndexOf('.'); if (dotIdx > 0) { const recv = expr.slice(0, dotIdx); const method = expr.slice(dotIdx + 1); if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only const recvType = inferCppReceiverType(recv, ref, context, depth + 1); if (!recvType) return null; return lookupCalleeReturnType(`${recvType}::${method}`, ref, context); } const ret = lookupCalleeReturnType(expr, ref, context); if (ret) return ret; // Direct construction — the callee itself names a class/struct. if (cppClassExists(expr, ref, context)) return cppLastSegment(expr); return null; } /** * Recover the type of an `auto`-declared local from its initializer on the * declaration line — `auto x = Foo::instance();`, `auto w = make_unique();`, * `auto p = new W();`, `auto w = Widget();` (#645). */ function inferCppAutoInitializerType( line: string, receiverName: string, ref: UnresolvedRef, context: ResolutionContext, depth: number, ): string | null { const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`)); if (!m || !m[1]) return null; const init = m[1].trim(); const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/); if (neu && neu[1]) return cppLastSegment(neu[1]); // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique(...)`. const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/); if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1); return null; } /** * Resolve a C++ chained call whose receiver is itself a call — encoded by the * extractor as `().` (#645). The receiver's type is what * the inner call returns; the outer method is then resolved and VALIDATED on it * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference * produces no edge rather than a wrong one. */ export function matchCppCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const cls = resolveCppCallResultType(m[1], ref, context); if (!cls) return null; return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method'); } /** * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call — * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory` * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to * the factory's own type, a concrete return type to that type. The outer method is * then resolved and VALIDATED on it (resolveMethodOnType requires the method to * exist on the type or a supertype it conforms to), so a wrong inference yields no * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust). */ export function matchScopedCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const inner = m[1]; const method = m[2]; if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains const factoryClass = inner.slice(0, inner.lastIndexOf('::')); const ret = lookupCalleeReturnType(inner, ref, context); if (!ret) return null; // `self` (the extractor's marker for self/static/$this) → the factory's class. const resolvedClass = ret === 'self' ? factoryClass : ret; return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method'); } /** * Languages where an unprefixed capitalized call `Foo(args)` constructs the * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`, * so a bare `Foo()` there is a method call, not construction — excluded. Scala's * `Foo(args)` is a case-class / companion `apply`, which conventionally returns * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi: * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves * the method on `TFoo` — same shape, same validation. */ const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']); /** * Resolve a dotted chained call whose receiver is a static factory / fluent call — * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar` * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns * (its declared return type); the outer method is then resolved and VALIDATED on * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an * unrelated class is never matched). Shared by the dot-notation languages * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names. */ export function matchDottedCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const inner = m[1]; // `Foo.getInstance` const method = m[2]; // `bar` const lastDot = inner.lastIndexOf('.'); if (lastDot <= 0) { // Go: bare package-level factory FUNCTION `New().method()` — the receiver's // type is what `New` returns; resolve the method on that. if (ref.language === 'go') { const ret = lookupCalleeReturnType(inner, ref, context); if (ret) { return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context)); } // `inner` isn't a function with a captured return type — typically a // package-level VARIABLE holding a function value (e.g. gin's `engine()`), // whose type we can't recover. Fall back to bare-name resolution of the // method so we don't DROP an edge the un-re-encoded bare path would have // found. (When `inner` IS a real factory function but the method doesn't // exist on its return type, `ret` is truthy and we returned no edge above — // the absent-method safety guarantee is preserved.) // // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The // batched resolver (resolveAndPersistBatched) reads unresolved rows from // offset 0 every pass and relies on deleteSpecificResolvedReferences — // keyed on referenceName — to clear each resolved row so the batch empties. // If we propagated the synthetic ref's bare `method` as `.original`, the // delete would never match the stored `inner().method` row, the batch would // never drain, and the loop would re-resolve + re-insert forever (a runaway // that grew gin's graph to 5M edges / 1.4 GB before this fix). const bareRef = { ...ref, referenceName: method }; const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context); return bareMatch ? { ...bareMatch, original: ref } : null; } // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare, // capitalized inner is a class construction, so the receiver's type is the // class itself — resolve the method on it. Only in languages where an // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C# // a bare `Foo()` is a method call (constructors need `new`), so we must not // assume construction. A lowercase bare inner is a top-level `factory().method()` // whose type we can't recover — bail. if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null; return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context)); } // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's // type is what `Receiver.factory` returns (its declared return type). const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name const factoryMethod = inner.slice(lastDot + 1); if (!factoryClass || !factoryMethod) return null; const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context); if (!ret) { // Objective-C: a class-message factory — `[X alloc]`, `[X new]`, // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by // convention (`instancetype`). So when the factory's own return type isn't // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't // user-defined nodes at all), the receiver's type is the class `X` itself. // This resolves the ubiquitous `[[X alloc] init]` and singleton chains. // resolveMethodOnType validates against X (and its supertypes), so a class // whose method actually lives elsewhere yields NO edge, not a wrong one — and // crucially this does NOT fire when a concrete return type WAS captured but // simply lacks the method (that already returned null above: absent-method // safety, so a same-named decoy is still never matched). if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) { return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context)); } // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain // (the type-naming convention), so `factoryClass` is always a real class here. // A factory whose return type wasn't captured is a CONSTRUCTOR // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `: // TBar` annotation but returns its own class) or an unannotated function. In // both cases the receiver's type is the class itself, so resolve the method on // `factoryClass`. resolveMethodOnType validates against it (and its // supertypes), so a wrong inference yields no edge — and this never fires when // a return type WAS captured but lacks the method (absent-method safety above). if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) { return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context)); } return null; } return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context)); } /** * When several classes share a simple type name, the caller file's import of * that type is the only signal that names WHICH one (#314). Returns the imported * FQN for `typeName` in the ref's file, or undefined. */ function importedFqnOf( typeName: string, ref: UnresolvedRef, context: ResolutionContext, ): string | undefined { const imports = context.getImportMappings(ref.filePath, ref.language); return imports.find((i) => i.localName === typeName)?.source; } /** * Java/Kotlin: infer a receiver's declared type by walking field declarations * in the class enclosing the call site. The field's `signature` is already in * the form " " (set by tree-sitter.ts extractField), so we * pull the type from there. Handles Spring `@Resource UserBO userbo;` / * `@Autowired private UserService userService;` where the receiver field name * doesn't match the class name by Java naming convention. * * Returns the bare type name (generics stripped, dotted package stripped) or * null when no matching field is in the enclosing class. */ function inferJavaFieldReceiverType( receiverName: string, ref: UnresolvedRef, context: ResolutionContext, ): string | null { const inFile = context.getNodesInFile(ref.filePath); if (inFile.length === 0) return null; // Find the class enclosing the call line (tightest match by latest start). let enclosing: Node | null = null; for (const n of inFile) { if (n.kind !== 'class' && n.kind !== 'interface') continue; if (n.language !== ref.language) continue; const end = n.endLine ?? n.startLine; if (n.startLine <= ref.line && end >= ref.line) { if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n; } } if (!enclosing) return null; const enclosingEnd = enclosing.endLine ?? enclosing.startLine; const field = inFile.find( (n) => n.kind === 'field' && n.name === receiverName && n.language === ref.language && n.startLine >= enclosing.startLine && (n.endLine ?? n.startLine) <= enclosingEnd, ); if (!field || !field.signature) return null; // Signature shape: " " (extractField). Pull the type, // strip generics + dotted package, drop array/varargs markers. const beforeName = field.signature.slice( 0, field.signature.lastIndexOf(field.name), ); const typeRaw = beforeName.trim(); if (!typeRaw) return null; const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim(); const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim(); const parts = typeNoArray.split(/[.\s]+/).filter(Boolean); const lastPart = parts[parts.length - 1]; if (!lastPart) return null; if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip return lastPart; } // ── Local-variable receiver-type inference (#1108) ────────────────────────── // // Instance calls through a local variable (`const lg = new Logger(); lg.log()`) // only resolved in C++ before this — no other language could learn the // receiver's type. Local variables are not indexed as nodes (node-explosion), // so, like the C++ inferrer above, we read the enclosing function's source and // match the receiver's declaration/initializer to recover its type. The type is // then handed to resolveMethodOnType, which VALIDATES that the type actually // declares the method, so a mis-inference produces NO edge — the safety net // that lets the patterns below stay simple. C++ keeps its dedicated inferrer // (header scan + `auto`); this covers every other language. // Tokens a loose pattern might capture that are never a user-defined type. const NON_TYPE_RECEIVER_TOKENS = new Set([ 'this', 'self', 'super', 'new', 'return', 'await', 'yield', 'typeof', 'null', 'nil', 'None', 'true', 'false', 'True', 'False', 'undefined', ]); /** * Normalize a captured type expression to a simple type name: drop generic * args and pointer/ref markers, take the last `.`/`::`-qualified segment, and * reject obvious non-types. */ function normalizeInferredTypeName(raw: string): string | null { const cleaned = raw.replace(/<[^>]*>/g, '').replace(/[&*]/g, '').trim(); const seg = cleaned.split(/[.:]+/).filter(Boolean).pop(); if (!seg) return null; if (NON_TYPE_RECEIVER_TOKENS.has(seg)) return null; return seg; } /** * Per-language patterns that recover a local variable's (or typed parameter's) * type from its declaration/initializer. Each regex captures the type in group * 1; `r` is the already-escaped receiver name. Ordered most-specific first. * PascalCase is required in the capture where the language convention allows, * as a cheap false-positive guard on top of resolveMethodOnType's validation. */ function localReceiverTypePatterns(language: Language, r: string): RegExp[] { switch (language) { case 'typescript': case 'javascript': case 'tsx': case 'jsx': return [ new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_$][\\w.$]*)`), // = new Logger() new RegExp(`\\b(?:const|let|var)\\s+${r}\\s*:\\s*([A-Z][\\w.$]*)`), // lg: Logger ]; case 'python': return [ new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // lg = Logger(...) new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // lg: Logger (PEP 526) ]; case 'java': return [ new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger() new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param ]; case 'kotlin': return [ new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // val lg = Logger(...) new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param ]; case 'csharp': return [ new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger() new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param ]; case 'swift': return [ new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // let lg = Logger(...) new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // let lg: Logger / param ]; case 'rust': return [ new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\b(?:\\s*:[^=]+)?=\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg = Logger::new()/Logger{}/Logger new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\s*:\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg: Logger ]; case 'go': return [ new RegExp(`\\b${r}\\b\\s*:=\\s*&?([A-Za-z_][\\w.]*)\\s*{`), // lg := Logger{} / &Logger{} new RegExp(`\\bvar\\s+${r}\\s+\\*?([A-Za-z_][\\w.]*)`), // var lg Logger / *Logger ]; case 'ruby': return [ new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w:]*)\\.new\\b`), // lg = Logger.new ]; case 'scala': return [ new RegExp(`\\b${r}\\b\\s*=\\s*(?:new\\s+)?([A-Z][\\w.]*)`), // val lg = new Logger / Logger(...) new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param ]; case 'dart': return [ new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // var lg = Logger(...) new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;]`), // Logger lg = ... ]; case 'php': return [ new RegExp(`\\$?${r}\\b\\s*=\\s*new\\s+([A-Za-z_\\\\][\\w\\\\]*)`), // $lg = new Logger() ]; default: return []; } } /** 1-based start line of the tightest function/method enclosing the call. */ function enclosingScopeStartLine(ref: UnresolvedRef, context: ResolutionContext): number { let start = 1; for (const n of context.getNodesInFile(ref.filePath)) { if (n.kind !== 'function' && n.kind !== 'method') continue; if (n.language !== ref.language) continue; const end = n.endLine ?? n.startLine; if (n.startLine <= ref.line && end >= ref.line && n.startLine >= start) { start = n.startLine; } } return start; } /** * Infer a receiver's type from its local declaration/initializer in the * enclosing function body. Language-dispatched; returns null for languages * without patterns or when no declaration is found. Bounded to the enclosing * scope so a same-named variable in another function can't leak in. */ function inferLocalReceiverType( receiverName: string, ref: UnresolvedRef, context: ResolutionContext, ): string | null { const patterns = localReceiverTypePatterns( ref.language, receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), ); if (patterns.length === 0) return null; const source = context.readFile(ref.filePath); if (!source) return null; const lines = source.split(/\r?\n/); const callIdx = Math.max(0, Math.min(lines.length - 1, ref.line - 1)); const startIdx = Math.max(0, enclosingScopeStartLine(ref, context) - 1); // Nearest declaration wins: scan backward from the call to the scope start. for (let i = callIdx; i >= startIdx; i--) { const line = lines[i]; if (!line) continue; for (const re of patterns) { const m = line.match(re); if (m && m[1]) { const type = normalizeInferredTypeName(m[1]); if (type) return type; } } } return null; } /** * Try to resolve by method name on a class/object */ export function matchMethodCall( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Parse method call patterns like "obj.method" or "Class::method". The method // part allows trailing `:` keywords so Objective-C selectors resolve // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other // languages' method refs, so this is a no-op for them. // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED // call resolves by its last segment — Strategy 3 below name-matches the method // (with its existing single-candidate / receiver-overlap guards). Without this // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`, // `Guard.Against.X()`) matched no pattern and never resolved. const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/); const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/); const match = dotMatch || colonMatch; if (!match) { return null; } const [, objectOrClass, methodName] = match; // Infer the receiver's type from its local declaration/initializer in the // enclosing scope, then resolve the method on that type (#1108). C++ keeps its // dedicated inferrer (header scan + `auto`); every other language uses the // shared source-based inferrer. resolveMethodOnType validates the method // exists on the inferred type, so a mis-inference produces no edge. if (dotMatch) { const inferredType = ref.language === 'cpp' ? inferCppReceiverType(objectOrClass!, ref, context) : inferLocalReceiverType(objectOrClass!, ref, context); if (inferredType) { // Java/Kotlin: when two classes share the simple name, the file's import // pins WHICH one (#314). Other languages disambiguate by call-site file. const importedFqn = ref.language === 'java' || ref.language === 'kotlin' ? context .getImportMappings(ref.filePath, ref.language) .find((i) => i.localName === inferredType)?.source : undefined; const typedMatch = resolveMethodOnType( inferredType, methodName!, ref, context, 0.9, 'instance-method', importedFqn, ); if (typedMatch) { return typedMatch; } } } // Java/Kotlin: receiver may be a field whose name doesn't match the type by // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up // the field in the enclosing class to get its declared type, then resolve // the method on that type. Covers Spring `@Resource`/`@Autowired` field // injection where the field type is the concrete bean class. if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) { const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context); if (inferredType) { // When two classes share the same simple name, the caller file's // import is the only signal that names WHICH one — pass the // imported FQN so resolveMethodOnType can disambiguate (#314). const imports = context.getImportMappings(ref.filePath, ref.language); const importedFqn = imports.find((i) => i.localName === inferredType)?.source; const typedMatch = resolveMethodOnType( inferredType, methodName!, ref, context, 0.9, 'instance-method', importedFqn, ); if (typedMatch) { return typedMatch; } } } // Strategy 1: Direct class name match (existing logic). When the receiver // names a class that exists in several files (`Logger.log()` / `Logger::log()` // with a `Logger` in both `a/` and `b/`), try the class in the call site's // own file first — otherwise the first-indexed class wins and a call in `b/` // resolves to `a/`'s method (#1079). const classCandidates = preferCallSiteFile( context.getNodesByName(objectOrClass!), ref.filePath, ); for (const classNode of classCandidates) { if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') { // Skip cross-language class matches if (classNode.language !== ref.language) continue; const nodesInFile = context.getNodesInFile(classNode.filePath); const methodNode = nodesInFile.find( (n) => n.kind === 'method' && n.name === methodName && n.qualifiedName.includes(classNode.name) ); if (methodNode) { return { original: ref, targetNodeId: methodNode.id, confidence: 0.85, resolvedBy: 'qualified-name', }; } } } // Strategy 2: Instance variable receiver - try capitalized form to find class // e.g., "permissionEngine" → look for classes containing "PermissionEngine" const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1); if (capitalizedReceiver !== objectOrClass) { const fuzzyClassCandidates = preferCallSiteFile( context.getNodesByName(capitalizedReceiver), ref.filePath, ); for (const classNode of fuzzyClassCandidates) { if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') { // Skip cross-language class matches if (classNode.language !== ref.language) continue; const nodesInFile = context.getNodesInFile(classNode.filePath); const methodNode = nodesInFile.find( (n) => n.kind === 'method' && n.name === methodName && n.qualifiedName.includes(classNode.name) ); if (methodNode) { return { original: ref, targetNodeId: methodNode.id, confidence: 0.8, resolvedBy: 'instance-method', }; } } } } // Strategy 3: Find methods by name across the codebase, match by receiver // name similarity with the containing class. Handles abbreviated variable // names like permissionEngine → PermissionRuleEngine. if (methodName) { const methodCandidates = context.getNodesByName(methodName!); // Ubiquitous-method ceiling (#999): a method name re-declared across a // vendored theme/SDK (Metronic's `init`/`update`/… on every widget) yields // K candidates that receiver-word overlap can't reliably disambiguate — // and filtering + scoring all K per call is the O(K²) cost that wedged // "Resolving refs" for 15-28 min. Bail before the O(K) work; Strategy 1/2 // (class-name match) already had their precise shot above. if (methodCandidates.length > AMBIGUOUS_NAME_CEILING) { return null; } const methods = methodCandidates.filter( (n) => n.kind === 'method' && n.name === methodName ); // Filter to same-language candidates first const sameLanguageMethods = methods.filter(m => m.language === ref.language); const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods; // If only one same-language method with this name exists, use it if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) { return { original: ref, targetNodeId: targetMethods[0]!.id, confidence: 0.7, resolvedBy: 'instance-method', }; } // Multiple methods: score by receiver name word overlap with class name if (targetMethods.length > 1) { const receiverWords = splitCamelCase(objectOrClass!); let bestMatch: typeof targetMethods[0] | undefined; let bestScore = 0; // Same-file candidates first, so a score tie (`score > bestScore` keeps // the first seen) resolves to the call site's own file rather than the // first-indexed duplicate (#1079). for (const method of preferCallSiteFile(targetMethods, ref.filePath)) { const classWords = splitCamelCase(method.qualifiedName); let score = receiverWords.filter(w => classWords.some(cw => cw.toLowerCase() === w.toLowerCase()) ).length; // Bonus for same language if (method.language === ref.language) score += 1; if (score > bestScore) { bestScore = score; bestMatch = method; } } if (bestMatch && bestScore >= 2) { return { original: ref, targetNodeId: bestMatch.id, confidence: 0.65, resolvedBy: 'instance-method', }; } } } return null; } /** * Split a camelCase or PascalCase string into words. */ function splitCamelCase(str: string): string[] { return str.replace(/([a-z])([A-Z])/g, '$1 $2') .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') .split(/[\s._:\/\\]+/) .filter(w => w.length > 1); } /** * Compute directory proximity from a pre-split list of directory segments * (`filePath1` minus its filename) and a second file path. * Returns a score based on the number of shared leading directory segments. * Higher score = closer in directory tree. * * Split into a pre-split variant because findBestMatch scores every candidate * against the SAME `ref.filePath`; re-splitting it per candidate was a hot spot * on large repos (#915), so the caller splits it once and passes the segments. */ function pathProximityFromDirs(dir1: string[], filePath2: string): number { const dir2 = filePath2.split('/'); dir2.pop(); // drop filename — matches the original slice(0, -1) on both paths let shared = 0; const limit = Math.min(dir1.length, dir2.length); for (let i = 0; i < limit; i++) { if (dir1[i] === dir2[i]) { shared++; } else { break; } } // Each shared directory segment contributes 15 points, capped at 80 return Math.min(shared * 15, 80); } /** * Compute directory proximity between two file paths. * Returns a score based on the number of shared directory segments. */ function computePathProximity(filePath1: string, filePath2: string): number { const dir1 = filePath1.split('/'); dir1.pop(); return pathProximityFromDirs(dir1, filePath2); } /** * Find the best matching node when there are multiple candidates */ function findBestMatch( ref: UnresolvedRef, candidates: Node[], _context: ResolutionContext ): Node | null { // Prioritization rules: // 1. Same file > different file // 2. Directory proximity (same module/package > different module) // 3. Same language > different language // 4. Functions/methods > classes/types (for call references) // 5. Exported > non-exported let bestScore = -1; let bestNode: Node | null = null; // Split the ref's path once (it's the same across every candidate) instead of // re-splitting it inside computePathProximity per candidate (#915 hot spot). const refDirs = ref.filePath.split('/'); refDirs.pop(); // A same-language candidate ALWAYS outscores a cross-language one: same-language // scores at least +50 (language bonus), while a cross-language candidate maxes // out at +35 (−80 language, +80 proximity, +25 kind, +10 exported; it can never // be in the same file). So when any same-language candidate exists, skip the // cross-language ones — provably the same winner, without paying the per-candidate // scoring. Cuts the candidate set to same-language size on mixed front-end + // back-end repos (#915). When ALL candidates are cross-language (a legitimate // cross-language `calls` bridge), none are skipped and behavior is unchanged. const hasSameLanguage = candidates.some((c) => c.language === ref.language); for (const candidate of candidates) { if (hasSameLanguage && candidate.language !== ref.language) continue; let score = 0; // Same file bonus if (candidate.filePath === ref.filePath) { score += 100; } // Directory proximity bonus — strongly prefer same module/package score += pathProximityFromDirs(refDirs, candidate.filePath); // Language matching: strongly prefer same language, penalize cross-language if (candidate.language === ref.language) { score += 50; } else { score -= 80; } // For call references, prefer functions/methods if (ref.referenceKind === 'calls') { if (candidate.kind === 'function' || candidate.kind === 'method') { score += 25; } } // For instantiation references (`new Foo()`), prefer class-like // targets — without this, a function named `Foo` in another module // could outscore the actual class. if (ref.referenceKind === 'instantiates') { if ( candidate.kind === 'class' || candidate.kind === 'struct' || candidate.kind === 'interface' ) { score += 25; } } // For decorator references (`@Foo`), prefer functions. Class // decorators (Python `@SomeClass`, Java annotation interfaces) // also resolve here, hence the smaller class bonus. if (ref.referenceKind === 'decorates') { if (candidate.kind === 'function' || candidate.kind === 'method') { score += 25; } else if (candidate.kind === 'class' || candidate.kind === 'interface') { score += 15; } } // Exported bonus if (candidate.isExported) { score += 10; } // Closer line number (within same file) if (candidate.filePath === ref.filePath && candidate.startLine) { const distance = Math.abs(candidate.startLine - ref.line); score += Math.max(0, 20 - distance / 10); } if (score > bestScore) { bestScore = score; bestNode = candidate; } } return bestNode; } /** * Fuzzy match - last resort with lower confidence */ export function matchFuzzy( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { const lowerName = ref.referenceName.toLowerCase(); // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes const candidates = context.getNodesByLowerName(lowerName); // Filter to callable kinds only (function, method, class) const callableKinds = new Set(['function', 'method', 'class']); const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref); // Prefer same-language matches const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language); const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates; if (finalCandidates.length === 1) { const isCrossLanguage = finalCandidates[0]!.language !== ref.language; return { original: ref, targetNodeId: finalCandidates[0]!.id, confidence: isCrossLanguage ? 0.3 : 0.5, resolvedBy: 'fuzzy', }; } return null; } /** * Match all strategies in order of confidence */ export function matchReference( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Function-as-value refs (#756) resolve ONLY through the dedicated matcher — // never the fuzzy/qualified fallthrough below (a wrong callback edge is // worse than none). if (ref.referenceKind === 'function_ref') { return matchFunctionRef(ref, context); } // Try strategies in order of confidence let result: ResolvedRef | null; // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node) result = matchByFilePath(ref, context); if (result) return result; // 1. Qualified name match (highest confidence) result = matchByQualifiedName(ref, context); if (result) return result; // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()` // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the // receiver's type from what the inner call returns, then the method on it. if (ref.language === 'cpp' || ref.language === 'c') { result = matchCppCallChain(ref, context); if (result) return result; } // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or // concrete return type. if (ref.language === 'php' || ref.language === 'rust') { result = matchScopedCallChain(ref, context); if (result) return result; } // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift / // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`, // Scala's companion factory, Dart's static factory / factory-constructor, or // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt` // (#645/#608 mechanism). Resolve the method's class from the inner call's // declared return type, then validate it. if ( ref.language === 'java' || ref.language === 'kotlin' || ref.language === 'csharp' || ref.language === 'swift' || ref.language === 'go' || ref.language === 'scala' || ref.language === 'dart' || ref.language === 'objc' || ref.language === 'pascal' ) { result = matchDottedCallChain(ref, context); if (result) return result; } // 2. Method call pattern result = matchMethodCall(ref, context); if (result) return result; // 3. Exact name match result = matchByExactName(ref, context); if (result) return result; // 4. Fuzzy match (lowest confidence) result = matchFuzzy(ref, context); if (result) return result; return null; }