/** * Name Matcher * * Handles symbol name matching for reference resolution. */ import { Node } from '../types'; import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types'; /** * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid") * by matching the filename against file nodes. */ export function matchByFilePath( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so // leave it to the symbol-matching strategies. if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) { return null; } // Extract the filename from the path const fileName = ref.referenceName.split('/').pop(); if (!fileName) return null; // Search for file nodes with this name const candidates = context.getNodesByName(fileName); const fileNodes = candidates.filter(n => n.kind === 'file'); if (fileNodes.length === 0) return null; // Prefer exact path match on qualified_name const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName); if (exactMatch) { return { original: ref, targetNodeId: exactMatch.id, confidence: 0.95, resolvedBy: 'file-path', }; } // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches // "src/snippets/foo.liquid"). When several files share the basename — a // `#include "RNCAsyncStorage.h"` with a same-named header on another platform // (windows/code/ vs apple/) — prefer the one in the includer's own directory, // then by directory proximity / same language family. A C/C++ include (and any // bare-filename import) resolves relative to the including file, not to an // arbitrary same-named header elsewhere in the tree. const suffixMatches = fileNodes.filter( n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName) ); if (suffixMatches.length > 0) { return { original: ref, targetNodeId: pickClosestFileNode(suffixMatches, ref).id, confidence: 0.85, resolvedBy: 'file-path', }; } // If only one file node with this name, use it with lower confidence if (fileNodes.length === 1) { return { original: ref, targetNodeId: fileNodes[0]!.id, confidence: 0.7, resolvedBy: 'file-path', }; } return null; } /** * Among several file nodes that all match a bare include/import by basename, * pick the one closest to the referencing file: same directory first, then by * directory-tree proximity, with the same language family as a tiebreak. A * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the * including file — not to an arbitrary same-named header on another platform. */ function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node { const dirOf = (p: string): string => { const i = p.lastIndexOf('/'); return i >= 0 ? p.slice(0, i) : ''; }; const refDir = dirOf(ref.filePath); const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir); const pool = sameDir.length > 0 ? sameDir : candidates; let best = pool[0]!; let bestScore = -Infinity; for (const c of pool) { const score = computePathProximity(ref.filePath, c.filePath) + (sameLanguageFamily(c.language, ref.language) ? 5 : 0); if (score > bestScore) { bestScore = score; best = c; } } return best; } /** * Language families that share a type system / runtime, so a same-language-only * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java * `Foo`). Anything not listed forms its own singleton family. */ const LANGUAGE_FAMILY: Record = { java: 'jvm', kotlin: 'jvm', scala: 'jvm', swift: 'apple', objc: 'apple', typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web', c: 'c', cpp: 'c', // Razor/Blazor markup names C# types — same family so `@model Foo` / // `` resolve to their `.cs` class through the cross-family gate. csharp: 'dotnet', razor: 'dotnet', }; export function sameLanguageFamily(a: string, b: string): boolean { if (a === b) return true; const fa = LANGUAGE_FAMILY[a]; return fa !== undefined && fa === LANGUAGE_FAMILY[b]; } /** * True when `lang` belongs to a known multi-language family (jvm/apple/web/c). * Languages not listed (php, python, go, ruby, rust, dart, …) and config * formats (yaml/xml/blade) form their own singleton families and return * `false` — used to leave config↔code framework bridges (whose config side is * never a known programming-language family) out of the cross-family gate. */ export function isKnownLanguageFamily(lang: string): boolean { return LANGUAGE_FAMILY[lang] !== undefined; } /** * True when `a` and `b` are two DIFFERENT *known* language families — the * signature of a coincidental cross-language name collision (a TS `import * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a * same-named ObjC header on another platform). The both-*known* test is * deliberately weaker than {@link sameLanguageFamily}'s negation: a * single-file-component language that carries its own tag (`vue`/`svelte`) * importing a `.ts` module, or any singleton-family language (php/go/ruby/…), * returns `false` here and is left alone. */ export function crossesKnownFamily(a: string, b: string): boolean { return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b); } /** * Drop cross-language candidates from a name lookup. Two regimes: * - `references` (type-usage): a type named in language X resolves to a * SAME-family type, never a coincidentally same-named symbol in another * language (the Android `BatteryManager` system class vs a JS one). Strict * same-family filter — cross-language communication is `calls`, not refs. * - `imports` (import binding): an `import`/`#include` never crosses two * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives. */ function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] { if (ref.referenceKind === 'references') { return candidates.filter((c) => sameLanguageFamily(c.language, ref.language)); } if (ref.referenceKind === 'imports') { return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language)); } return candidates; } /** * Try to resolve a reference by exact name match */ export function matchByExactName( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref); if (candidates.length === 0) { return null; } // If only one match, use it — but penalize cross-language matches if (candidates.length === 1) { const isCrossLanguage = candidates[0]!.language !== ref.language; return { original: ref, targetNodeId: candidates[0]!.id, confidence: isCrossLanguage ? 0.5 : 0.9, resolvedBy: 'exact-match', }; } // Multiple matches - try to narrow down const bestMatch = findBestMatch(ref, candidates, context); if (bestMatch) { // Lower confidence when the match is from a distant/unrelated module const proximity = computePathProximity(ref.filePath, bestMatch.filePath); const confidence = proximity >= 30 ? 0.7 : 0.4; return { original: ref, targetNodeId: bestMatch.id, confidence, resolvedBy: 'exact-match', }; } return null; } /** * Try to resolve by qualified name */ export function matchByQualifiedName( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Check if the reference name looks qualified (contains :: or .) if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) { return null; } const candidates = context.getNodesByQualifiedName(ref.referenceName); if (candidates.length === 1) { return { original: ref, targetNodeId: candidates[0]!.id, confidence: 0.95, resolvedBy: 'qualified-name', }; } // Try partial qualified name match const parts = ref.referenceName.split(/[:.]/); const lastName = parts[parts.length - 1]; if (lastName) { const partialCandidates = context.getNodesByName(lastName); for (const candidate of partialCandidates) { if (candidate.qualifiedName.endsWith(ref.referenceName)) { return { original: ref, targetNodeId: candidate.id, confidence: 0.85, resolvedBy: 'qualified-name', }; } } } return null; } function resolveMethodOnType( typeName: string, methodName: string, ref: UnresolvedRef, context: ResolutionContext, confidence: number, resolvedBy: ResolvedRef['resolvedBy'], /** * Optional FQN that identifies WHICH class declaration `typeName` * refers to in the caller's file. When multiple candidates share * the same qualifiedName (`FooConverter::convert` in both * `dao/converter/` and `service/converter/`), the FQN's * file-path-suffix picks the right one — the disambiguation * signal Java imports carry but the call site doesn't (#314). */ preferredFqn?: string, /** Recursion guard for the supertype/conformance walk. */ depth = 0, ): ResolvedRef | null { // Look up methods by name and match by qualifiedName ending in // `::`. This works whether the method is defined // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp). // The previous same-file approach missed the latter — the typical C++ layout. const methodCandidates = context.getNodesByName(methodName); const want = `${typeName}::${methodName}`; const matches: Node[] = []; for (const m of methodCandidates) { if (m.kind !== 'method') continue; if (m.language !== ref.language) continue; const qn = m.qualifiedName; if (qn === want || qn.endsWith(`::${want}`)) { matches.push(m); } } if (matches.length === 0) { // Conformance fallback: the method may be defined on a supertype `typeName` // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol- // extension method, a C# default-interface or extension method, a Kotlin // extension on a supertype). Walk supertypes transitively (depth-capped) via // the resolved implements/extends edges — empty in the first resolution pass, // populated in the conformance pass. Still VALIDATED (the method must exist on // a supertype), so a wrong inference produces no edge. if (depth < 4 && context.getSupertypes) { for (const supertype of context.getSupertypes(typeName, ref.language)) { const via = resolveMethodOnType( supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1, ); if (via) return via; } } return null; } if (matches.length > 1 && preferredFqn) { const ext = ref.language === 'kotlin' ? '.kt' : '.java'; const fqnPath = preferredFqn.replace(/\./g, '/') + ext; const chosen = matches.find((m) => { const fp = m.filePath.replace(/\\/g, '/'); return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath); }); if (chosen) { return { original: ref, targetNodeId: chosen.id, confidence, resolvedBy, }; } } return { original: ref, targetNodeId: matches[0]!.id, confidence, resolvedBy, }; } // C++ keywords/control-flow tokens that can appear right before a receiver // (e.g. `return ptr->m()`) and must NOT be treated as a type. const CPP_NON_TYPE_TOKENS = new Set([ 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default', 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield', 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast', 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor', ]); function normalizeCppTypeName(typeName: string): string | null { const normalized = typeName .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ') .replace(/[&*]+/g, ' ') .replace(/<[^>]*>/g, ' ') .replace(/\s+/g, ' ') .trim(); if (!normalized) return null; const parts = normalized.split(/::/).filter(Boolean); const last = parts[parts.length - 1]; if (!last) return null; if (CPP_NON_TYPE_TOKENS.has(last)) return null; return last; } // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`, // `Type*receiver`, `Type receiver`, etc., REQUIRING a declarator terminator // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The // terminator rules out uses like `return receiver->m()` where the preceding // token is a keyword, not a type. function buildDeclaratorRegex(escapedReceiver: string): RegExp { return new RegExp( `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`, ); } function inferCppReceiverType( receiverName: string, ref: UnresolvedRef, context: ResolutionContext, depth = 0, ): string | null { const source = context.readFile(ref.filePath); if (!source) return null; const lines = source.split(/\r?\n/); const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1)); const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`); const declaratorRegex = buildDeclaratorRegex(escapedReceiver); for (let i = callLineIndex; i >= 0; i--) { const line = lines[i]; if (!line || !receiverPattern.test(line)) continue; const declaratorMatch = line.match(declaratorRegex); if (declaratorMatch) { const normalized = normalizeCppTypeName(declaratorMatch[1] ?? ''); if (normalized === 'auto') { // `auto x = Foo::instance();` — the declared type is deduced; recover it // from the initializer (call return type / construction) (#645). const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth); if (initType) return initType; // No usable initializer on this line — keep scanning earlier ones. } else if (normalized) { return normalized; } } } const headerCandidates = [ ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'), ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'), ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'), ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath); for (const headerPath of headerCandidates) { if (!context.fileExists(headerPath)) continue; const headerSource = context.readFile(headerPath); if (!headerSource) continue; for (const line of headerSource.split(/\r?\n/)) { if (!receiverPattern.test(line)) continue; const declaratorMatch = line.match(declaratorRegex); if (!declaratorMatch) continue; const normalized = normalizeCppTypeName(declaratorMatch[1] ?? ''); if (normalized && normalized !== 'auto') return normalized; } } return null; } /** * Last `::`-separated segment of a (possibly namespace-qualified) C++ name. */ function cppLastSegment(name: string): string { const parts = name.split('::').filter(Boolean); return parts[parts.length - 1] ?? name; } /** * Return type captured at extraction for `Class::method` (or a free function), * read off the indexed node's `returnType` — used by the C++ (#645) and PHP * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no * return type was recorded (a `void`/primitive return). */ function lookupCalleeReturnType( callee: string, ref: UnresolvedRef, context: ResolutionContext, ): string | null { let method = callee; let cls: string | null = null; if (callee.includes('::')) { const parts = callee.split('::').filter(Boolean); method = parts[parts.length - 1] ?? callee; cls = parts.slice(0, -1).join('::'); } const candidates = context.getNodesByName(method).filter( (n) => (n.kind === 'method' || n.kind === 'function') && n.language === ref.language && !!n.returnType, ); if (cls) { const want = `${cls}::${method}`; // The call site may name the class with MORE namespace qualification than // the stored node (`details::registry::instance` at the call vs // `registry::instance` on the node — the receiver type only carries the // immediate class), or LESS. Accept an exact match or either being a // namespace-suffix of the other; the shared `::::` tail keeps // it specific. const m = candidates.find( (n) => n.qualifiedName === want || n.qualifiedName.endsWith(`::${want}`) || want.endsWith(`::${n.qualifiedName}`), ); return m?.returnType ?? null; } return candidates.find((n) => n.kind === 'function')?.returnType ?? null; } /** Does the graph contain a class/struct named `name`'s last segment? */ function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean { const last = cppLastSegment(name); return context .getNodesByName(last) .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language); } /** * Infer the class produced by a C++ call/construction expression, using return * types captured at extraction (#645). Handles, in order: * - `make_unique()` / `make_shared()` → T * - single-level member call `recv.method()` → recv's type, then method's return * - `Class::method()` / free `func()` → the callee's recorded return type * - direct construction `Type()` / `ns::Type()` → Type * Returns null when undeterminable. Callers MUST still validate the outer method * exists on the result before creating an edge, so a wrong guess stays silent. */ function resolveCppCallResultType( inner: string, ref: UnresolvedRef, context: ResolutionContext, depth = 0, ): string | null { if (depth > 3) return null; // guard against pathological mutual recursion const expr = inner.trim(); const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/); if (make) return make[1] ?? null; // Single-level member call `recv.method` (the `manager.view().render()` shape). const dotIdx = expr.lastIndexOf('.'); if (dotIdx > 0) { const recv = expr.slice(0, dotIdx); const method = expr.slice(dotIdx + 1); if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only const recvType = inferCppReceiverType(recv, ref, context, depth + 1); if (!recvType) return null; return lookupCalleeReturnType(`${recvType}::${method}`, ref, context); } const ret = lookupCalleeReturnType(expr, ref, context); if (ret) return ret; // Direct construction — the callee itself names a class/struct. if (cppClassExists(expr, ref, context)) return cppLastSegment(expr); return null; } /** * Recover the type of an `auto`-declared local from its initializer on the * declaration line — `auto x = Foo::instance();`, `auto w = make_unique();`, * `auto p = new W();`, `auto w = Widget();` (#645). */ function inferCppAutoInitializerType( line: string, receiverName: string, ref: UnresolvedRef, context: ResolutionContext, depth: number, ): string | null { const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`)); if (!m || !m[1]) return null; const init = m[1].trim(); const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/); if (neu && neu[1]) return cppLastSegment(neu[1]); // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique(...)`. const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/); if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1); return null; } /** * Resolve a C++ chained call whose receiver is itself a call — encoded by the * extractor as `().` (#645). The receiver's type is what * the inner call returns; the outer method is then resolved and VALIDATED on it * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference * produces no edge rather than a wrong one. */ export function matchCppCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const cls = resolveCppCallResultType(m[1], ref, context); if (!cls) return null; return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method'); } /** * Resolve a PHP fluent static-factory chain whose receiver is a static call — * `Cls::for($x)->method()`, encoded by the extractor as `Cls::for().method` * (#608, the per-credential Laravel client idiom). The receiver's type is what * `Cls::for` returns: a `: self` / `: static` resolves to `Cls` itself, a * concrete `: Type` to that type. The outer method is then resolved and * VALIDATED on it (resolveMethodOnType requires the method to exist), so a * wrong inference yields no edge rather than a wrong one. */ export function matchPhpCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const inner = m[1]; const method = m[2]; if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains const factoryClass = inner.slice(0, inner.lastIndexOf('::')); const ret = lookupCalleeReturnType(inner, ref, context); if (!ret) return null; // `self` (the extractor's marker for self/static/$this) → the factory's class. const resolvedClass = ret === 'self' ? factoryClass : ret; return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method'); } /** * Resolve a dotted chained call whose receiver is a static factory / fluent call — * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar` * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns * (its declared return type); the outer method is then resolved and VALIDATED on * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an * unrelated class is never matched). Shared by the dot-notation languages * (Java, Kotlin, C#) — same receiver shape, same `Class::method` qualified names. */ export function matchDottedCallChain( ref: UnresolvedRef, context: ResolutionContext, ): ResolvedRef | null { const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/); if (!m || !m[1] || !m[2]) return null; const inner = m[1]; // `Foo.getInstance` const method = m[2]; // `bar` const lastDot = inner.lastIndexOf('.'); // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare, // capitalized inner is a class construction, so the receiver's type is the // class itself — resolve the method on it. Kotlin only: there an unprefixed // capitalized call constructs the class, whereas in Java a bare `Foo()` is a // method call (constructors need `new`), so we must not assume construction. // A lowercase bare inner is a top-level `factory().method()` whose type we // can't recover — bail. if (lastDot <= 0) { if (ref.language !== 'kotlin' || !/^[A-Z]/.test(inner)) return null; return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context)); } // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's // type is what `Receiver.factory` returns (its declared return type). const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name const factoryMethod = inner.slice(lastDot + 1); if (!factoryClass || !factoryMethod) return null; const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context); if (!ret) return null; return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context)); } /** * When several classes share a simple type name, the caller file's import of * that type is the only signal that names WHICH one (#314). Returns the imported * FQN for `typeName` in the ref's file, or undefined. */ function importedFqnOf( typeName: string, ref: UnresolvedRef, context: ResolutionContext, ): string | undefined { const imports = context.getImportMappings(ref.filePath, ref.language); return imports.find((i) => i.localName === typeName)?.source; } /** * Java/Kotlin: infer a receiver's declared type by walking field declarations * in the class enclosing the call site. The field's `signature` is already in * the form " " (set by tree-sitter.ts extractField), so we * pull the type from there. Handles Spring `@Resource UserBO userbo;` / * `@Autowired private UserService userService;` where the receiver field name * doesn't match the class name by Java naming convention. * * Returns the bare type name (generics stripped, dotted package stripped) or * null when no matching field is in the enclosing class. */ function inferJavaFieldReceiverType( receiverName: string, ref: UnresolvedRef, context: ResolutionContext, ): string | null { const inFile = context.getNodesInFile(ref.filePath); if (inFile.length === 0) return null; // Find the class enclosing the call line (tightest match by latest start). let enclosing: Node | null = null; for (const n of inFile) { if (n.kind !== 'class' && n.kind !== 'interface') continue; if (n.language !== ref.language) continue; const end = n.endLine ?? n.startLine; if (n.startLine <= ref.line && end >= ref.line) { if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n; } } if (!enclosing) return null; const enclosingEnd = enclosing.endLine ?? enclosing.startLine; const field = inFile.find( (n) => n.kind === 'field' && n.name === receiverName && n.language === ref.language && n.startLine >= enclosing.startLine && (n.endLine ?? n.startLine) <= enclosingEnd, ); if (!field || !field.signature) return null; // Signature shape: " " (extractField). Pull the type, // strip generics + dotted package, drop array/varargs markers. const beforeName = field.signature.slice( 0, field.signature.lastIndexOf(field.name), ); const typeRaw = beforeName.trim(); if (!typeRaw) return null; const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim(); const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim(); const parts = typeNoArray.split(/[.\s]+/).filter(Boolean); const lastPart = parts[parts.length - 1]; if (!lastPart) return null; if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip return lastPart; } /** * Try to resolve by method name on a class/object */ export function matchMethodCall( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Parse method call patterns like "obj.method" or "Class::method". The method // part allows trailing `:` keywords so Objective-C selectors resolve // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other // languages' method refs, so this is a no-op for them. // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED // call resolves by its last segment — Strategy 3 below name-matches the method // (with its existing single-candidate / receiver-overlap guards). Without this // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`, // `Guard.Against.X()`) matched no pattern and never resolved. const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/); const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/); const match = dotMatch || colonMatch; if (!match) { return null; } const [, objectOrClass, methodName] = match; if (ref.language === 'cpp' && dotMatch) { const inferredType = inferCppReceiverType(objectOrClass!, ref, context); if (inferredType) { const typedMatch = resolveMethodOnType( inferredType, methodName!, ref, context, 0.9, 'instance-method', ); if (typedMatch) { return typedMatch; } } } // Java/Kotlin: receiver may be a field whose name doesn't match the type by // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up // the field in the enclosing class to get its declared type, then resolve // the method on that type. Covers Spring `@Resource`/`@Autowired` field // injection where the field type is the concrete bean class. if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) { const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context); if (inferredType) { // When two classes share the same simple name, the caller file's // import is the only signal that names WHICH one — pass the // imported FQN so resolveMethodOnType can disambiguate (#314). const imports = context.getImportMappings(ref.filePath, ref.language); const importedFqn = imports.find((i) => i.localName === inferredType)?.source; const typedMatch = resolveMethodOnType( inferredType, methodName!, ref, context, 0.9, 'instance-method', importedFqn, ); if (typedMatch) { return typedMatch; } } } // Strategy 1: Direct class name match (existing logic) const classCandidates = context.getNodesByName(objectOrClass!); for (const classNode of classCandidates) { if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') { // Skip cross-language class matches if (classNode.language !== ref.language) continue; const nodesInFile = context.getNodesInFile(classNode.filePath); const methodNode = nodesInFile.find( (n) => n.kind === 'method' && n.name === methodName && n.qualifiedName.includes(classNode.name) ); if (methodNode) { return { original: ref, targetNodeId: methodNode.id, confidence: 0.85, resolvedBy: 'qualified-name', }; } } } // Strategy 2: Instance variable receiver - try capitalized form to find class // e.g., "permissionEngine" → look for classes containing "PermissionEngine" const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1); if (capitalizedReceiver !== objectOrClass) { const fuzzyClassCandidates = context.getNodesByName(capitalizedReceiver); for (const classNode of fuzzyClassCandidates) { if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') { // Skip cross-language class matches if (classNode.language !== ref.language) continue; const nodesInFile = context.getNodesInFile(classNode.filePath); const methodNode = nodesInFile.find( (n) => n.kind === 'method' && n.name === methodName && n.qualifiedName.includes(classNode.name) ); if (methodNode) { return { original: ref, targetNodeId: methodNode.id, confidence: 0.8, resolvedBy: 'instance-method', }; } } } } // Strategy 3: Find methods by name across the codebase, match by receiver // name similarity with the containing class. Handles abbreviated variable // names like permissionEngine → PermissionRuleEngine. if (methodName) { const methodCandidates = context.getNodesByName(methodName!); const methods = methodCandidates.filter( (n) => n.kind === 'method' && n.name === methodName ); // Filter to same-language candidates first const sameLanguageMethods = methods.filter(m => m.language === ref.language); const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods; // If only one same-language method with this name exists, use it if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) { return { original: ref, targetNodeId: targetMethods[0]!.id, confidence: 0.7, resolvedBy: 'instance-method', }; } // Multiple methods: score by receiver name word overlap with class name if (targetMethods.length > 1) { const receiverWords = splitCamelCase(objectOrClass!); let bestMatch: typeof targetMethods[0] | undefined; let bestScore = 0; for (const method of targetMethods) { const classWords = splitCamelCase(method.qualifiedName); let score = receiverWords.filter(w => classWords.some(cw => cw.toLowerCase() === w.toLowerCase()) ).length; // Bonus for same language if (method.language === ref.language) score += 1; if (score > bestScore) { bestScore = score; bestMatch = method; } } if (bestMatch && bestScore >= 2) { return { original: ref, targetNodeId: bestMatch.id, confidence: 0.65, resolvedBy: 'instance-method', }; } } } return null; } /** * Split a camelCase or PascalCase string into words. */ function splitCamelCase(str: string): string[] { return str.replace(/([a-z])([A-Z])/g, '$1 $2') .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') .split(/[\s._:\/\\]+/) .filter(w => w.length > 1); } /** * Compute directory proximity between two file paths. * Returns a score based on the number of shared directory segments. * Higher score = closer in directory tree. */ function computePathProximity(filePath1: string, filePath2: string): number { const dir1 = filePath1.split('/').slice(0, -1); const dir2 = filePath2.split('/').slice(0, -1); let shared = 0; for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) { if (dir1[i] === dir2[i]) { shared++; } else { break; } } // Each shared directory segment contributes 15 points, capped at 80 return Math.min(shared * 15, 80); } /** * Find the best matching node when there are multiple candidates */ function findBestMatch( ref: UnresolvedRef, candidates: Node[], _context: ResolutionContext ): Node | null { // Prioritization rules: // 1. Same file > different file // 2. Directory proximity (same module/package > different module) // 3. Same language > different language // 4. Functions/methods > classes/types (for call references) // 5. Exported > non-exported let bestScore = -1; let bestNode: Node | null = null; for (const candidate of candidates) { let score = 0; // Same file bonus if (candidate.filePath === ref.filePath) { score += 100; } // Directory proximity bonus — strongly prefer same module/package score += computePathProximity(ref.filePath, candidate.filePath); // Language matching: strongly prefer same language, penalize cross-language if (candidate.language === ref.language) { score += 50; } else { score -= 80; } // For call references, prefer functions/methods if (ref.referenceKind === 'calls') { if (candidate.kind === 'function' || candidate.kind === 'method') { score += 25; } } // For instantiation references (`new Foo()`), prefer class-like // targets — without this, a function named `Foo` in another module // could outscore the actual class. if (ref.referenceKind === 'instantiates') { if ( candidate.kind === 'class' || candidate.kind === 'struct' || candidate.kind === 'interface' ) { score += 25; } } // For decorator references (`@Foo`), prefer functions. Class // decorators (Python `@SomeClass`, Java annotation interfaces) // also resolve here, hence the smaller class bonus. if (ref.referenceKind === 'decorates') { if (candidate.kind === 'function' || candidate.kind === 'method') { score += 25; } else if (candidate.kind === 'class' || candidate.kind === 'interface') { score += 15; } } // Exported bonus if (candidate.isExported) { score += 10; } // Closer line number (within same file) if (candidate.filePath === ref.filePath && candidate.startLine) { const distance = Math.abs(candidate.startLine - ref.line); score += Math.max(0, 20 - distance / 10); } if (score > bestScore) { bestScore = score; bestNode = candidate; } } return bestNode; } /** * Fuzzy match - last resort with lower confidence */ export function matchFuzzy( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { const lowerName = ref.referenceName.toLowerCase(); // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes const candidates = context.getNodesByLowerName(lowerName); // Filter to callable kinds only (function, method, class) const callableKinds = new Set(['function', 'method', 'class']); const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref); // Prefer same-language matches const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language); const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates; if (finalCandidates.length === 1) { const isCrossLanguage = finalCandidates[0]!.language !== ref.language; return { original: ref, targetNodeId: finalCandidates[0]!.id, confidence: isCrossLanguage ? 0.3 : 0.5, resolvedBy: 'fuzzy', }; } return null; } /** * Match all strategies in order of confidence */ export function matchReference( ref: UnresolvedRef, context: ResolutionContext ): ResolvedRef | null { // Try strategies in order of confidence let result: ResolvedRef | null; // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node) result = matchByFilePath(ref, context); if (result) return result; // 1. Qualified name match (highest confidence) result = matchByQualifiedName(ref, context); if (result) return result; // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()` // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the // receiver's type from what the inner call returns, then the method on it. if (ref.language === 'cpp' || ref.language === 'c') { result = matchCppCallChain(ref, context); if (result) return result; } // 1c. PHP fluent static-factory chain — `Cls::for($x)->method()` encoded as // `Cls::for().method` (#608). Same idea as 1b: the receiver's type is the // factory's `: self` / `: Type` return. if (ref.language === 'php') { result = matchPhpCallChain(ref, context); if (result) return result; } // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C#) — // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608 // mechanism). Resolve bar's class from getInstance's declared return type, then // validate the method on it. if (ref.language === 'java' || ref.language === 'kotlin' || ref.language === 'csharp') { result = matchDottedCallChain(ref, context); if (result) return result; } // 2. Method call pattern result = matchMethodCall(ref, context); if (result) return result; // 3. Exact name match result = matchByExactName(ref, context); if (result) return result; // 4. Fuzzy match (lowest confidence) result = matchFuzzy(ref, context); if (result) return result; return null; }