/** * Tree-sitter Parser Wrapper * * Handles parsing source code and extracting structural information. */ import { Node as SyntaxNode, Tree } from 'web-tree-sitter'; import * as path from 'path'; import { Language, Node, Edge, NodeKind, ExtractionResult, ExtractionError, UnresolvedReference, } from '../types'; import { getParser, detectLanguage, isLanguageSupported, isFileLevelOnlyLanguage } from './grammars'; import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers'; import { FN_REF_SPECS, captureFnRefCandidates, type FnRefSpec, type FnRefCandidate } from './function-ref'; import { isGeneratedFile } from './generated-detection'; import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types'; import { EXTRACTORS } from './languages'; import { stripCppTemplateArgs } from './languages/c-cpp'; import { LiquidExtractor } from './liquid-extractor'; import { RazorExtractor } from './razor-extractor'; import { SvelteExtractor } from './svelte-extractor'; import { AstroExtractor } from './astro-extractor'; import { DfmExtractor } from './dfm-extractor'; import { VueExtractor } from './vue-extractor'; import { MyBatisExtractor } from './mybatis-extractor'; import { getAllFrameworkResolvers, getApplicableFrameworks, } from '../resolution/frameworks'; // Re-export for backward compatibility export { generateNodeId } from './tree-sitter-helpers'; /** * RTK Query generated-hook naming convention: `use` + PascalCase endpoint (with * an optional `Lazy` variant prefix) + `Query`/`Mutation`. Matches the hook * bindings to extract from an `export const {...} = api` destructuring. Kept in * sync with the same convention in `callback-synthesizer.ts` (the synth side). */ const RTK_HOOK_NAME_RE = /^use[A-Z][A-Za-z0-9]*(?:Query|Mutation)$/; /** React HOC callees whose result is itself a component — a PascalCase const * initialized with one of these is a component, not a constant (#841). */ const REACT_COMPONENT_HOCS = new Set(['forwardRef', 'memo', 'React.forwardRef', 'React.memo']); /** Vue store collections whose object-literal members are the symbols an agent * looks for. Extracted as function nodes so `actions`/`mutations`/`getters` are * findable + readable (the foundation under any later dispatch-bridge synth). */ const VUE_STORE_COLLECTION_NAMES = new Set(['actions', 'mutations', 'getters']); /** Store-definition callees whose config object carries those collections. */ const VUE_STORE_FACTORY_CALLEES = new Set(['defineStore', 'createStore']); /** Distinct signals that a file is a Vuex/Pinia store (≥2 ⇒ treat a bare * `const actions = {…}` as a store collection — see looksLikeVueStoreFile). */ const VUE_STORE_FILE_SIGNAL = /\bdefineStore\b|\bcreateStore\b|\bVuex\b|\bmutations\b|\bactions\b|\bgetters\b|\bnamespaced\b/g; /** * Extract the name from a node based on language */ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { const name = extractNameRaw(node, source, extractor); // Universal fallback: recover a real identifier from a name still mangled by a // macro the pre-parse didn't blank (C/C++ only — see recoverMangledName). A // no-op on well-formed names, so a clean name is never altered. return extractor.recoverMangledName ? extractor.recoverMangledName(name) : name; } function extractNameRaw(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { const hookName = extractor.resolveName?.(node, source); if (hookName) return hookName; // Try field name first const nameNode = getChildByField(node, extractor.nameField); if (nameNode) { // Unwrap pointer_declarator / reference_declarator for C/C++ pointer and // reference return types (`int* f()`, `int& f()`, `int&& f()`). Without // unwrapping the reference wrapper an inline reference-returning method is // named "& f() const" instead of "f" — common in Unreal Engine gameplay // headers (`const FGameplayTagContainer& GetActiveTags() const`). Out-of-line // defs (`T& C::f()`) already resolve via the qualified-name hook. A // pointer_declarator exposes its inner through a `declarator` field; a // reference_declarator has none, so it's reached via namedChild(0). let resolved = nameNode; while (resolved.type === 'pointer_declarator' || resolved.type === 'reference_declarator') { const inner = getChildByField(resolved, 'declarator') || resolved.namedChild(0); if (!inner) break; resolved = inner; } // C++ user-defined conversion operator: the declarator is an `operator_cast` // whose first child is the target type and second is the `() const` tail. Name // it `operator ` (the conventional spelling) rather than the whole // `operator EALSMovementState() const` declarator, so it matches symbolic // overloads (`operator+`) and is findable by the type name. if (resolved.type === 'operator_cast') { const typeNode = resolved.namedChild(0); return typeNode ? `operator ${getNodeText(typeNode, source).trim()}` : getNodeText(resolved, source); } // Handle complex declarators (C/C++) if (resolved.type === 'function_declarator' || resolved.type === 'declarator') { const innerName = getChildByField(resolved, 'declarator') || resolved.namedChild(0); return innerName ? getNodeText(innerName, source) : getNodeText(resolved, source); } // Lua: `function t.f()` / `function t:m()` — the name node is a dot/method // index expression; the simple name is the trailing field/method (the table // receiver is captured separately via getReceiverType). if (resolved.type === 'dot_index_expression') { const field = getChildByField(resolved, 'field'); if (field) return getNodeText(field, source); } if (resolved.type === 'method_index_expression') { const method = getChildByField(resolved, 'method'); if (method) return getNodeText(method, source); } return getNodeText(resolved, source); } // For Dart method_signature, look inside inner signature types if (node.type === 'method_signature') { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child && ( child.type === 'function_signature' || child.type === 'getter_signature' || child.type === 'setter_signature' || child.type === 'constructor_signature' || child.type === 'factory_constructor_signature' )) { // Find identifier inside the inner signature for (let j = 0; j < child.namedChildCount; j++) { const inner = child.namedChild(j); if (inner?.type === 'identifier') { return getNodeText(inner, source); } } } } } // Arrow/function expressions get their name from the parent variable_declarator, // not from identifiers in their body. Without this, single-expression arrow // functions like `const fn = () => someIdentifier` get named "someIdentifier" // instead of "fn", because the fallback below finds the body identifier. if (node.type === 'arrow_function' || node.type === 'function_expression') { return ''; } // Fall back to first identifier child for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if ( child && (child.type === 'identifier' || child.type === 'type_identifier' || child.type === 'simple_identifier' || child.type === 'constant') ) { return getNodeText(child, source); } } return ''; } /** * Resolve a Scala type node to its base type NAME for name-matching — unwrapping * `generic_type` (`Monoid[Int]` → `Monoid`), taking the last segment of a * qualified `stable_type_identifier` (`cats.Functor` → `Functor`), and falling * back to a descendant `type_identifier`. Returns null for non-type nodes. * Shared by Scala inheritance and type-reference extraction. */ function scalaBaseTypeName(node: SyntaxNode | null, source: string): string | null { if (!node) return null; switch (node.type) { case 'type_identifier': case 'identifier': return getNodeText(node, source); case 'generic_type': // ` type_arguments` — the base type is the first named child. return scalaBaseTypeName(node.namedChild(0), source); case 'stable_type_identifier': case 'stable_identifier': { // Qualified `a.b.C` — match on the simple (last) segment. const ids = node.namedChildren.filter( (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'identifier' ); const last = ids[ids.length - 1]; return last ? getNodeText(last, source) : null; } default: { const id = node.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier'); return id ? getNodeText(id, source) : null; } } } /** * Resolve the declared identifier inside a C declarator. A `declaration`'s * `declarator` field nests the name through `init_declarator` (with value), * `pointer_declarator`/`array_declarator`/`parenthesized_declarator` * wrappers (each via their own `declarator` field) down to an `identifier`. * A `function_declarator` means the declaration is a function prototype (or a * function-pointer var) — return null so it isn't extracted as a variable. */ function cDeclaratorIdentifier(node: SyntaxNode | null): SyntaxNode | null { let cur: SyntaxNode | null = node; let guard = 0; while (cur && guard++ < 12) { switch (cur.type) { case 'identifier': return cur; case 'function_declarator': return null; case 'init_declarator': case 'pointer_declarator': case 'array_declarator': case 'parenthesized_declarator': cur = getChildByField(cur, 'declarator'); break; default: return null; } } return null; } /** First `simple_identifier` in `node`'s subtree (breadth-ish, first-found). * Swift's property name nests as `property_declaration → pattern → * bound_identifier → simple_identifier`; this resolves it (and the bound name of * a Kotlin/Swift property declarator for the shadow prune). For a tuple pattern * (`let (a, b)`) it returns the first — acceptable, those are rare for consts. */ function firstSimpleIdentifier(node: SyntaxNode | null): SyntaxNode | null { const stack: SyntaxNode[] = node ? [node] : []; let guard = 0; while (stack.length > 0 && guard++ < 40) { const n = stack.shift()!; if (n.type === 'simple_identifier') return n; for (let i = 0; i < n.namedChildCount; i++) { const c = n.namedChild(i); if (c) stack.push(c); } } return null; } /** Swift property facts: the bound name, whether it's a `let`, and whether it's * a *computed* property (a getter block, no stored value — never a constant). */ function swiftPropertyInfo( node: SyntaxNode, source: string, ): { nameNode: SyntaxNode | null; isLet: boolean; isComputed: boolean } { const pattern = getChildByField(node, 'name') ?? node.namedChildren.find((c) => c.type === 'value_binding_pattern' || c.type === 'pattern') ?? null; const binding = node.namedChildren.find((c) => c.type === 'value_binding_pattern'); const isLet = binding != null && getNodeText(binding, source).trimStart().startsWith('let'); const isComputed = node.namedChildren.some( (c) => c.type === 'computed_property' || c.type === 'protocol_property_requirements', ); return { nameNode: firstSimpleIdentifier(pattern), isLet, isComputed }; } /** True when `node` is (transitively) inside a C function body — i.e. a local, * not a file/namespace-scope declaration. Walks the parent chain to the root. */ function hasFunctionAncestor(node: SyntaxNode): boolean { let p = node.parent; while (p) { if (p.type === 'function_definition') return true; p = p.parent; } return false; } /** * PHP type-position wrapper node kinds (a type-hint is `named_type`, * `?Foo` is `optional_type`, `A|B` is `union_type`, `A&B` is * `intersection_type`). Used to find the type subtree inside a parameter / * property / return position before walking it for class references. */ const PHP_TYPE_NODES: ReadonlySet = new Set([ 'named_type', 'optional_type', 'nullable_type', 'union_type', 'intersection_type', 'disjunctive_normal_form_type', 'primitive_type', ]); /** * Member-access node kinds whose receiver, when it's a capitalized * type/enum/class name, is a real dependency — `Enum.value`, `Type.CONST`, * `Foo::BAR`. These VALUE reads (as opposed to `Type.method()` calls, already * handled) produced no edge, so a type used only via a static member or enum * value looked like nothing depended on it. See {@link extractStaticMemberRef}. */ const MEMBER_ACCESS_TYPES: ReadonlySet = new Set([ 'field_access', // java (`Foo.BAR`) 'member_access_expression', // c# (`Foo.Bar`) 'navigation_expression', // kotlin / swift (`Foo.bar`) 'field_expression', // scala (`Foo.bar`) 'class_constant_access_expression', // php (`Foo::CONST`, `Foo::class`) 'scoped_property_access_expression', // php (`Foo::$bar`) 'qualified_identifier', // c++ (`Foo::bar`) ]); /** * Languages whose types are Capitalized by convention, so a capitalized * member-access receiver is reliably a type (not a local/variable). The * static-member/value-read pass is gated to these — the ones where it was the * confirmed residual frontier (enum-value / static-field reads). TS/JS/Python * are deliberately excluded, and a measured A/B confirms the call: extending the * pass to them adds ZERO coverage — in import-based languages you must `import` a * type before any `Type.MEMBER` read, so the import edge already covers it (the * static read is pure duplication) — while adding real graph noise (+1813 edges / * +2448 `references` on excalidraw, the retrieval-perf benchmark, all pointing at * already-covered types). Don't re-add `member_expression`/`attribute` here. */ const STATIC_MEMBER_LANGS: ReadonlySet = new Set([ 'java', 'csharp', 'kotlin', 'swift', 'scala', 'dart', 'php', 'cpp', ]); /** * Tree-sitter node kinds that represent constructor invocations * (`new Foo()` and friends). Used by extractInstantiation to emit * an `instantiates` reference targeting the class name. */ const INSTANTIATION_KINDS: ReadonlySet = new Set([ 'new_expression', // typescript / javascript / tsx / jsx 'object_creation_expression', // java / c# 'instance_creation_expression', // some grammars 'composite_literal', // go — `Widget{...}` / `pkga.Widget{...}` 'struct_expression', // rust — `Widget { n: 1 }` / `m::Widget { .. }` 'instance_expression', // scala — `new Monoid[Int] { ... }` ]); /** * TreeSitterExtractor - Main extraction class */ export class TreeSitterExtractor { private filePath: string; private language: Language; private source: string; private tree: Tree | null = null; private nodes: Node[] = []; private edges: Edge[] = []; private unresolvedReferences: UnresolvedReference[] = []; // Value-reference edges (default ON; set CODEGRAPH_VALUE_REFS=0 to disable; see flushValueRefs). // Same-file reads of file-scope const/var symbols → `references` edges so impact analysis catches // value consumers ("change this constant/table, affect its readers"). private static readonly VALUE_REF_LANGS = new Set(['typescript', 'javascript', 'tsx', 'go', 'python', 'rust', 'ruby', 'c', 'java', 'csharp', 'php', 'scala', 'kotlin', 'swift', 'dart', 'pascal']); private static readonly MAX_VALUE_REF_NODES = 20_000; private readonly valueRefsEnabled = process.env.CODEGRAPH_VALUE_REFS !== '0'; private fileScopeValues = new Map(); private fileScopeValueCounts = new Map(); // file-scope nodes per name (conditional-def detection) private valueRefScopes: Array<{ id: string; node: SyntaxNode; name: string }> = []; private errors: ExtractionError[] = []; private extractor: LanguageExtractor | null = null; private nodeStack: string[] = []; // Stack of parent node IDs private methodIndex: Map | null = null; // lookup key → node ID for Pascal defProc lookup // Function-as-value capture (#756): per-language spec + candidates collected // during the walk, gated & flushed into unresolvedReferences at end-of-file // (see flushFnRefCandidates). private fnRefSpec: FnRefSpec | undefined; private fnRefCandidates: Array = []; // Memoized "is this a Vue store file" verdict (per-extractor = per-file). private vueStoreFile: boolean | null = null; constructor(filePath: string, source: string, language?: Language) { this.filePath = filePath; this.source = source; this.language = language || detectLanguage(filePath, source); this.extractor = EXTRACTORS[this.language] || null; this.fnRefSpec = FN_REF_SPECS[this.language]; } /** * Parse and extract from the source code */ extract(): ExtractionResult { const startTime = Date.now(); if (!isLanguageSupported(this.language)) { return { nodes: [], edges: [], unresolvedReferences: [], errors: [ { message: `Unsupported language: ${this.language}`, filePath: this.filePath, severity: 'error', code: 'unsupported_language', }, ], durationMs: Date.now() - startTime, }; } const parser = getParser(this.language); if (!parser) { return { nodes: [], edges: [], unresolvedReferences: [], errors: [ { message: `Failed to get parser for language: ${this.language}`, filePath: this.filePath, severity: 'error', code: 'parser_error', }, ], durationMs: Date.now() - startTime, }; } try { // Optional pre-parse source transform (offset-preserving) to work around // grammar gaps — e.g. C# blanks conditional-compilation directive lines // the grammar mis-parses inside enum bodies (#237). We reassign // this.source so downstream getNodeText reads the same bytes the parser // saw (identical outside the blanked directive lines). if (this.extractor?.preParse) { this.source = this.extractor.preParse(this.source, this.filePath); } this.tree = parser.parse(this.source) ?? null; if (!this.tree) { throw new Error('Parser returned null tree'); } // Create file node representing the source file const fileNode: Node = { id: `file:${this.filePath}`, kind: 'file', name: path.basename(this.filePath), qualifiedName: this.filePath, filePath: this.filePath, language: this.language, startLine: 1, endLine: this.source.split('\n').length, startColumn: 0, endColumn: 0, isExported: false, updatedAt: Date.now(), }; this.nodes.push(fileNode); // Push file node onto stack so top-level declarations get contains edges this.nodeStack.push(fileNode.id); // File-level package declaration (Kotlin/Java). Creates an implicit // `namespace` node wrapping every top-level declaration so their // qualifiedName carries the FQN — required for cross-file import // resolution on JVM languages where filename ≠ class name. const packageNodeId = this.extractFilePackage(this.tree.rootNode); if (packageNodeId) this.nodeStack.push(packageNodeId); this.visitNode(this.tree.rootNode); // Gate + flush function-as-value candidates (#756) while the file's // nodes and import refs are complete and the file node is still pushed. this.flushFnRefCandidates(); this.flushValueRefs(); if (packageNodeId) this.nodeStack.pop(); this.nodeStack.pop(); } catch (error) { const msg = error instanceof Error ? error.message : String(error); // WASM memory errors leave the module in a corrupted state — all subsequent // parses would also fail. Re-throw so the worker can detect and crash, // forcing a clean restart with a fresh heap. if (msg.includes('memory access out of bounds') || msg.includes('out of memory')) { throw error; } this.errors.push({ message: `Parse error: ${msg}`, filePath: this.filePath, severity: 'error', code: 'parse_error', }); } finally { // Free tree-sitter WASM memory immediately — trees hold native heap memory // invisible to V8's GC that accumulates across thousands of files. if (this.tree) { this.tree.delete(); this.tree = null; } // Release source string to reduce GC pressure this.source = ''; } return { nodes: this.nodes, edges: this.edges, unresolvedReferences: this.unresolvedReferences, errors: this.errors, durationMs: Date.now() - startTime, }; } /** * Function-as-value capture (#756): if this node is one of the language's * value-position containers (call arguments, assignment RHS, struct/object * initializer, array/table literal), collect candidate function names from * it. Candidates are gated & flushed at end-of-file (flushFnRefCandidates). */ private maybeCaptureFnRefs(node: SyntaxNode, nodeType: string): void { const spec = this.fnRefSpec; if (!spec) return; const rule = spec.dispatch.get(nodeType); if (!rule || this.nodeStack.length === 0) return; const fromNodeId = this.nodeStack[this.nodeStack.length - 1]; if (!fromNodeId) return; for (const cand of captureFnRefCandidates(node, rule, spec, this.source)) { this.fnRefCandidates.push({ ...cand, fromNodeId }); } } /** * Candidates-only scan of a subtree the main walkers won't traverse * (top-level variable initializers). No extraction side effects. Halts at * nested function definitions: their bodies are walked — and their * candidates attributed — by extractFunction's own body walk. */ private scanFnRefSubtree(node: SyntaxNode, depth: number): void { if (!this.fnRefSpec || depth > 12) return; const nodeType = node.type; if (depth > 0 && ( this.extractor?.functionTypes.includes(nodeType) || nodeType === 'arrow_function' || nodeType === 'function_expression' || nodeType === 'lambda_literal' || nodeType === 'lambda_expression' )) { return; } this.maybeCaptureFnRefs(node, nodeType); for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child) this.scanFnRefSubtree(child, depth + 1); } } /** * Gate captured function-as-value candidates and push survivors as * `function_ref` unresolved references. * * The gate bounds volume and protects precision: a candidate survives only * if its name matches a function/method DEFINED IN THIS FILE or a name this * file imports/references. Everything else (locals, params, fields passed * as arguments) is dropped before it ever reaches the database. Resolution * then matches survivors against function/method nodes only * (matchFunctionRef) and emits `references` edges — which callers/impact * already traverse. * * Known v1 limit, deliberate: a C/C++ callback registered in a DIFFERENT * translation unit than its definition (extern, no symbol imports to match) * is not captured. Same-file registration — the dominant C pattern (static * callback + same-file ops struct) — is. */ private flushFnRefCandidates(): void { if (this.fnRefCandidates.length === 0) return; const candidates = this.fnRefCandidates; this.fnRefCandidates = []; // Generated/minified files (vendored jquery.min.js and friends): their // function-as-value edges are noise — single-letter minified symbols // resolve everywhere. Same policy as the callback synthesizer. if (isGeneratedFile(this.filePath)) return; const definedHere = new Set(); for (const n of this.nodes) { if (n.kind === 'function' || n.kind === 'method') definedHere.add(n.name); } // Import-binding names only (all binding emitters push kind 'imports'). // Deliberately NOT 'references': those carry type-annotation and // interface-member names, which let local variables that share a type // member's name slip through the gate (excalidraw A/B finding). A dotted // import (JVM `import com.example.OtherClass`) also contributes its LAST // segment — the simple name Java/Kotlin code uses in `OtherClass::method` // references. const SIMPLE_NAME = /^[A-Za-z_$][A-Za-z0-9_$]*$/; // JVM imports are dotted (`com.example.OtherClass`); PHP `use` imports // are backslashed (`App\Services\Mailer`). Both contribute their last // segment — the simple name code uses to reference them. const QUALIFIED_IMPORT = /^[A-Za-z_$][A-Za-z0-9_$.\\]*[.\\]([A-Za-z_$][A-Za-z0-9_$]*)$/; const importedNames = new Set(); for (const r of this.unresolvedReferences) { if (r.referenceKind !== 'imports') continue; if (SIMPLE_NAME.test(r.referenceName)) { importedNames.add(r.referenceName); } else { const qualified = r.referenceName.match(QUALIFIED_IMPORT); if (qualified) importedNames.add(qualified[1]!); } } const ungated = this.fnRefSpec?.ungatedModes; const addressOfOnly = this.fnRefSpec?.addressOfOnly === true; const seen = new Set(); for (const c of candidates) { const atFileScope = c.fromNodeId.startsWith('file:'); // C++ (addressOfOnly): a BARE identifier qualifies only inside a // file-scope initializer table. Everywhere else — args, assignments, // local braced-init lists like `{begin, size}` — only explicit `&` // forms count (fmt A/B finding: generic names `begin`/`out`/`size` // collide with locals and members). if ( addressOfOnly && !c.explicitRef && !(atFileScope && (c.mode === 'value' || c.mode === 'list')) ) { continue; } // Gate policy by candidate shape: // - `this.`: ALWAYS flush — the member may be inherited from a // class in another file (definedHere can't see it), volume is // naturally bounded by real `this.X` expressions, and resolution is // strictly class-scoped (own members or the validated supertype // pass), so nothing fuzzy can leak. // - `Scope::member` (C++ member-pointers, Java/Kotlin type-qualified // method refs, PHP `'Cls::m'`): ALWAYS flush — the explicit-ref // syntax is self-selecting, the referenced type often needs NO // import (Java/Kotlin same-package, Kotlin companions), and // resolution is scope-suffix-anchored + unique-or-drop, so a // same-named member on another class can't match. // - C-family file-scope initializers skip the gate entirely // (constant-expression context — see FnRefSpec.ungatedModes). // - everything else: name ∈ same-file functions/methods ∪ imports. if (!c.name.startsWith('this.') && !c.name.includes('::')) { const skipGate = (ungated?.has(c.mode) === true && atFileScope) || c.skipGate === true; // PHP HOF-position string callables (see FnRefCandidate.skipGate) if (!skipGate && !definedHere.has(c.name) && !importedNames.has(c.name)) { continue; } } const key = `${c.fromNodeId}|${c.name}`; if (seen.has(key)) continue; seen.add(key); this.unresolvedReferences.push({ fromNodeId: c.fromNodeId, referenceName: c.name, referenceKind: 'function_ref', line: c.line, column: c.column, }); } } /** * Record value-reference bookkeeping as nodes are created: file-scope const/var symbols with * distinctive names become reference targets; function/method/const/var symbols become reader * scopes whose bodies flushValueRefs scans. */ private captureValueRefScope(kind: NodeKind, name: string, id: string, node: SyntaxNode): void { // Pascal targets `constant` only: its extractor emits function PARAMETERS // (`Dest: TBufferWriter`) and class fields (`declField`) as `variable` at the // enclosing scope, which would otherwise become noisy targets (a param name // shared across many procs collapses to one file-wide target). Genuine // Pascal shared values are `const` (`constant`), so restrict to that. (Unit // `var` globals are the rare cost; the parameter/field noise dominates.) const targetKindOk = this.language === 'pascal' ? kind === 'constant' : kind === 'constant' || kind === 'variable'; if (targetKindOk && name.length >= 3 && /[A-Z_]/.test(name)) { const parentId = this.nodeStack[this.nodeStack.length - 1]; // file-scope OR class/module/struct/enum-scope constants are targets. // Class/module scope matters for languages (Ruby) that keep nearly all // constants inside a class or module; struct/enum scope matters for Swift, // which namespaces shared constants in `struct`/`enum` (`enum Constants { // static let X }`). Readers are same-file methods of that type. if ( parentId && (parentId.startsWith('file:') || parentId.startsWith('class:') || parentId.startsWith('module:') || parentId.startsWith('struct:') || parentId.startsWith('enum:')) ) { this.fileScopeValues.set(name, id); // How many target nodes carry this name. A conditional def // (`try: X = a; except: X = b`) makes >1 — distinct from a local shadow, // which adds a binding the prune must catch (see flushValueRefs). this.fileScopeValueCounts.set(name, (this.fileScopeValueCounts.get(name) ?? 0) + 1); } } if (kind === 'function' || kind === 'method' || kind === 'constant' || kind === 'variable') { this.valueRefScopes.push({ id, node, name }); } } /** * Emit same-file `references` edges from a symbol to the file-scope const/var it reads (TS/JS). * The engine doesn't edge const→consumer, so impact analysis misses "change this table, affect * its readers" (the ReScript-PR false positive). Same-file only (resolution is unambiguous), * distinctive target names only (dodges the local-shadowing precision trap documented on * function_ref), deduped per (reader, target). Default on (CODEGRAPH_VALUE_REFS=0 disables) + * additive. Shadowed targets are pruned — see below. */ private flushValueRefs(): void { const scopes = this.valueRefScopes; const targets = this.fileScopeValues; const fileScopeCounts = this.fileScopeValueCounts; this.valueRefScopes = []; this.fileScopeValues = new Map(); this.fileScopeValueCounts = new Map(); if (!this.valueRefsEnabled || !TreeSitterExtractor.VALUE_REF_LANGS.has(this.language)) return; if (targets.size === 0 || scopes.length === 0 || isGeneratedFile(this.filePath)) return; // Prune SHADOWED targets. A target re-bound in an INNER scope (a // bundled/Emscripten `const Module` re-declared as a nested `var Module`; a // Go package `const Timeout` shadowed by a local `Timeout := …`; a Python // module `CONFIG` shadowed by a local `CONFIG = …`) resolves to the inner // binding for nested readers, so a file-scope edge is a false positive. // Inner re-bindings aren't graph nodes, so detect them at the syntax level: // count every declarator of the name across the tree and compare against how // many FILE-SCOPE nodes carry it. A real shadow makes (declarators > // file-scope nodes) — the excess is the local binding. A conditional // module-level def (`try: X = a; except: X = b`) makes them EQUAL (both // declarators are file-scope nodes), so it's correctly kept. Complements the // path-based isGeneratedFile() check, which can't catch content-minified // bundles. // // Declarator node types are per-grammar; a file only contains its own // language's nodes, so matching all of them in one switch is safe. if (this.tree) { const declCounts = new Map(); const bump = (nameNode: SyntaxNode | null) => { // `simple_identifier` is Kotlin's name node (a property declarator's name). if (nameNode && (nameNode.type === 'identifier' || nameNode.type === 'simple_identifier')) { const nm = getNodeText(nameNode, this.source); if (targets.has(nm)) declCounts.set(nm, (declCounts.get(nm) ?? 0) + 1); } }; const dstack: SyntaxNode[] = [this.tree.rootNode]; let dvisited = 0; while (dstack.length > 0 && dvisited < TreeSitterExtractor.MAX_VALUE_REF_NODES) { const n = dstack.pop()!; dvisited++; switch (n.type) { case 'variable_declarator': // TS/JS/tsx case 'const_spec': // Go `const X = …` case 'var_spec': // Go `var X = …` bump(n.namedChild(0)); break; case 'const_item': // Rust `const X: T = …` case 'static_item': // Rust `static X: T = …` bump(getChildByField(n, 'name')); break; case 'let_declaration': // Rust `let x = …` (locals — the shadow source) case 'short_var_declaration': // Go `x, Y := …` case 'assignment': { // Python `X = …` / `X: T = …` / `A, B = …` const left = getChildByField(n, 'left') ?? getChildByField(n, 'pattern') ?? n.namedChild(0); if (left?.type === 'identifier') bump(left); else if (left) for (const c of left.namedChildren) bump(c); break; } case 'init_declarator': // C `T X = …` (file-scope const AND the local that shadows it) bump(cDeclaratorIdentifier(n)); break; case 'val_definition': // Scala `val X = …` (object/top-level const AND a method-local that shadows it) case 'var_definition': { // Scala `var X = …` const pat = getChildByField(n, 'pattern'); if (pat?.type === 'identifier') bump(pat); break; } case 'static_final_declaration': // Dart top-level/`static` `const`/`final` (the target itself) case 'initialized_identifier': // Dart instance field / `var` case 'initialized_variable_definition': { // Dart a method-local `const`/`final`/`var` that shadows a const const id = n.namedChildren.find((c) => c.type === 'identifier'); if (id) bump(id); break; } case 'declConst': // Pascal unit/class `const` (the target itself) AND a function-local `const` that shadows it case 'declVar': { // Pascal a function-local `var` that shadows a const bump(getChildByField(n, 'name')); break; } case 'property_declaration': { // Kotlin / Swift `val`/`let X = …` (object/static const AND a method-local that shadows it) // Kotlin: variable_declaration → simple_identifier; Swift: a `pattern` // (`` field) → simple_identifier. Resolve either shape. const vd = n.namedChildren.find((c) => c.type === 'variable_declaration'); const id = vd ? vd.namedChildren.find((c) => c.type === 'simple_identifier') : firstSimpleIdentifier( getChildByField(n, 'name') ?? n.namedChildren.find((c) => c.type === 'value_binding_pattern' || c.type === 'pattern') ?? null, ); if (id) bump(id); break; } } for (let i = 0; i < n.namedChildCount; i++) { const c = n.namedChild(i); if (c) dstack.push(c); } } for (const [nm, c] of declCounts) if (c > (fileScopeCounts.get(nm) ?? 1)) targets.delete(nm); if (targets.size === 0) return; } for (const scope of scopes) { const seen = new Set(); const stack: SyntaxNode[] = [scope.node]; // Dart and Pascal attach a function/method BODY as a *next sibling* of the // signature node that is stored as the reader scope (Dart `method_signature` // ← `function_body`; Pascal `declProc` ← `block`, both under a `defProc`), // not as a child — so the scope subtree is just the signature and the reads // live in the sibling. Pull it in. (A body as a next sibling of the scope // node is unique to Dart/Pascal among the value-ref languages — every other // grammar nests the body inside the function node — so this is inert // elsewhere.) const sib = scope.node.nextNamedSibling; if (sib && (sib.type === 'function_body' || sib.type === 'block')) stack.push(sib); let visited = 0; while (stack.length > 0 && visited < TreeSitterExtractor.MAX_VALUE_REF_NODES) { const n = stack.pop()!; visited++; // `constant` covers Ruby, where both a constant's definition and its // references are `constant`-typed nodes, not `identifier`. `name` covers // PHP, where a constant reference — bare `MAX_ITEMS` or the const half of // `self::MAX_ITEMS` / `Foo::MAX_ITEMS` — is a `name` node (a `$var` local // is a `variable_name`, a different namespace, so it can never shadow a // bare constant — no prune wiring needed). `simple_identifier` covers // Kotlin, whose every name reference (a const read included) is that // node type. Safe across languages: a file only holds its own grammar's // nodes; `name` is PHP-only and `simple_identifier` is Kotlin-only here. if ( n.type === 'identifier' || n.type === 'constant' || n.type === 'name' || n.type === 'simple_identifier' ) { const refName = getNodeText(n, this.source); const targetId = targets.get(refName); // Skip self and same-name targets: a symbol referencing a file-scope // sibling of its own name (the two halves of a conditional `try: X=…; // except: X=…`) is never a meaningful value read. if (targetId && targetId !== scope.id && refName !== scope.name && !seen.has(targetId)) { seen.add(targetId); this.edges.push({ source: scope.id, target: targetId, kind: 'references', metadata: { valueRef: true }, }); } } for (let i = 0; i < n.namedChildCount; i++) { const c = n.namedChild(i); if (c) stack.push(c); } } } } /** * Visit a node and extract information */ private visitNode(node: SyntaxNode): void { if (!this.extractor) return; const nodeType = node.type; let skipChildren = false; // Language-specific custom visitor hook if (this.extractor.visitNode) { const ctx = this.makeExtractorContext(); const handled = this.extractor.visitNode(node, ctx); if (handled) { // The hook consumed this subtree, so the walkers below never descend // into it — scan it for function-as-value candidates (#756). Scala's // hook handles val/var definitions (`val table = Seq(targetCb)`), for // example. The scan is capture-only and halts at nested functions. this.scanFnRefSubtree(node, 0); return; } } // Pascal-specific AST handling if (this.language === 'pascal') { skipChildren = this.visitPascalNode(node); if (skipChildren) return; } // Function-as-value capture (#756) — independent of the dispatch ladder // below (the captured container types have no other handler there), so it // can never shadow or be shadowed by an extraction branch. this.maybeCaptureFnRefs(node, nodeType); // Check for function declarations // For Python/Ruby, function_definition inside a class should be treated as method if (this.extractor.functionTypes.includes(nodeType)) { if (this.isInsideClassLikeNode() && this.extractor.methodTypes.includes(nodeType)) { // Inside a class - treat as method this.extractMethod(node); skipChildren = true; // extractMethod visits children via visitFunctionBody } else { this.extractFunction(node); skipChildren = true; // extractFunction visits children via visitFunctionBody } } // Check for class declarations else if (this.extractor.classTypes.includes(nodeType)) { // Some languages reuse class_declaration for structs/enums (e.g. Swift) const classification = this.extractor.classifyClassNode?.(node) ?? 'class'; if (classification === 'struct') { this.extractStruct(node); } else if (classification === 'enum') { this.extractEnum(node); } else if (classification === 'interface') { this.extractInterface(node); } else if (classification === 'trait') { this.extractClass(node, 'trait'); } else { this.extractClass(node); } skipChildren = true; // extractClass visits body children } // Extra class node types (e.g. Dart mixin_declaration, extension_declaration) else if (this.extractor.extraClassNodeTypes?.includes(nodeType)) { this.extractClass(node); skipChildren = true; } // Check for method declarations (only if not already handled by functionTypes) else if (this.extractor.methodTypes.includes(nodeType)) { // TS/JS class fields parse as a methodTypes node; only function-valued // fields are methods — a plain field (`public fonts: Fonts;`) is a // property (#808). classifyMethodNode is absent for other languages. if (this.extractor.classifyMethodNode?.(node) === 'property') { const propNode = this.extractProperty(node); // Walk the initializer so its calls/instantiations attribute to the // property (`history = createHistory()` → history calls // createHistory). The old field-as-method path never walked these // (resolveBody only resolves function bodies), so this is additive. const valueNode = getChildByField(node, 'value'); if (propNode && valueNode) { this.nodeStack.push(propNode.id); this.visitFunctionBody(valueNode, ''); this.nodeStack.pop(); } // A field initializer can also register callbacks // (`static handlers = { click: onClick }`) — scan it for // function-as-value candidates (capture-only, halts at functions). this.scanFnRefSubtree(node, 0); skipChildren = true; } else { this.extractMethod(node); skipChildren = true; // extractMethod visits children via visitFunctionBody } } // Check for interface/protocol/trait declarations else if (this.extractor.interfaceTypes.includes(nodeType)) { this.extractInterface(node); skipChildren = true; // extractInterface visits body children } // Check for struct declarations else if (this.extractor.structTypes.includes(nodeType)) { this.extractStruct(node); skipChildren = true; // extractStruct visits body children } // Check for enum declarations else if (this.extractor.enumTypes.includes(nodeType)) { this.extractEnum(node); skipChildren = true; // extractEnum visits body children } // Check for type alias declarations (e.g. `type X = ...` in TypeScript) // For Go, type_spec wraps struct/interface definitions — resolveTypeAliasKind // detects these and extractTypeAlias creates the correct node kind. else if (this.extractor.typeAliasTypes.includes(nodeType)) { skipChildren = this.extractTypeAlias(node); } // Check for class properties (e.g. C# property_declaration) else if (this.extractor.propertyTypes?.includes(nodeType) && this.isInsideClassLikeNode()) { this.extractProperty(node); // Property initializers aren't walked — scan for function-as-value // candidates (#756): Scala `val table = Seq(targetCb)` in an object, // Kotlin `val cb = ::handler` class properties. this.scanFnRefSubtree(node, 0); skipChildren = true; } // Check for class fields (e.g. Java field_declaration, C# field_declaration) else if (this.extractor.fieldTypes?.includes(nodeType) && this.isInsideClassLikeNode()) { this.extractField(node); // Field initializers aren't walked — scan for function-as-value // candidates (#756): Java `List table = List.of(Main::cb)`, // C# `List> table = new() { TargetCb }`. this.scanFnRefSubtree(node, 0); skipChildren = true; } // Check for variable declarations (const, let, var, etc.) // Only extract top-level variables (not inside functions/methods) — plus // class/module-scope CONSTANTS, which Ruby (and other const-in-class // languages) keep almost exclusively inside a class/module. A Ruby `CONST = // …` has a `constant`-typed LHS; other languages don't put one here, so this // is effectively Ruby-only and doesn't disturb their class-internal locals. else if ( this.extractor.variableTypes.includes(nodeType) && (!this.isInsideClassLikeNode() || this.isClassScopeConstantAssignment(node)) ) { this.extractVariable(node); // extractVariable doesn't walk every initializer shape (object literals // are deliberately skipped; Python/Ruby don't walk at all), so scan the // declaration subtree for function-as-value candidates — `const routes = // { home: renderHome }`, `handlers = {"recv": target_cb}`. The scan halts // at nested function definitions (their bodies are walked — and // attributed — separately) and flush-time dedup absorbs any overlap with // initializers extractVariable DOES walk. this.scanFnRefSubtree(node, 0); skipChildren = true; // extractVariable handles children } // Swift properties inside a type. A stored instance property becomes a `field` // node; a `static let`/`static var` member becomes `constant`/`variable` // (Swift's `static`-namespacing idiom — value-reference edges can then target // it); a COMPUTED property (getter block, no stored value) becomes a `property` // node whose getter is walked below so its calls attribute to it. A property's // PROPERTY WRAPPER (`@Argument`/`@Published`/`@State`/custom) and declared type // are dependencies attributed to the enclosing type. (Other languages extract // properties via property/field types.) else if ( this.language === 'swift' && (nodeType === 'property_declaration' || nodeType === 'protocol_property_declaration') && this.isInsideClassLikeNode() ) { const ownerId = this.nodeStack[this.nodeStack.length - 1]; const { nameNode, isLet, isComputed } = swiftPropertyInfo(node, this.source); let computedPropId: string | undefined; if (nameNode) { if (isComputed) { // Computed property — accessed like a property but its getter holds real // logic. Index as `property` so search/explore find it (#1020: computed // props such as a heavily-read `var isCloudProxy: Bool` returned "No // results found"); pushed below so the getter's calls attribute to it // rather than flattening onto the owning type (SwiftUI `var body: some // View { … }` — the whole subview tree — is the canonical case). const prop = this.createNode('property', getNodeText(nameNode, this.source), node, { visibility: this.extractor.getVisibility?.(node), isStatic: this.extractor.isStatic?.(node) ?? false, }); computedPropId = prop?.id; } else { // A `static let`/`static var` member is a SHARED constant of the type // (esp. in `enum`/`struct`); an instance stored property stays a `field` // (per-instance — Swift instance properties otherwise aren't own nodes). const isStatic = this.extractor.isStatic?.(node) ?? false; this.createNode(isStatic ? (isLet ? 'constant' : 'variable') : 'field', getNodeText(nameNode, this.source), node, { visibility: this.extractor.getVisibility?.(node), isStatic, }); } } if (ownerId) { this.extractDecoratorsFor(node, ownerId); this.extractVariableTypeAnnotation(node, ownerId); // Fluent / SwiftUI property-wrapper attributes often reference a model or // type by metatype in their ARGUMENTS — `@Siblings(through: Pivot.self, // …)`, `@Group(…)`. extractDecoratorsFor captures the wrapper type // (`Siblings`); this pulls the TYPE out of the argument expressions // (`Pivot.self` → a dependency on Pivot), so a model reached ONLY through // a relationship (a many-to-many pivot/join model) isn't left orphaned. // extractStaticMemberRef self-filters to `Type.member` navigation, so the // `\.$keypath` arguments and the wrapper `user_type` are skipped. const modifiers = node.namedChildren.find((c: SyntaxNode) => c.type === 'modifiers'); if (modifiers) { const walkAttrArgs = (n: SyntaxNode): void => { this.extractStaticMemberRef(n); for (let i = 0; i < n.namedChildCount; i++) { const c = n.namedChild(i); if (c) walkAttrArgs(c); } }; walkAttrArgs(modifiers); } } // A computed property's getter holds real logic — walk it with the property // node pushed so its calls/instantiations attribute to the property (a // SwiftUI `body`'s subview tree becomes the property's callees). skipChildren // then stops the generic walker from re-walking the getter (and the // modifiers/type annotation already handled above). if (computedPropId) { const getter = node.namedChildren.find( (c: SyntaxNode) => c.type === 'computed_property' || c.type === 'protocol_property_requirements', ); if (getter) { this.nodeStack.push(computedPropId); this.visitFunctionBody(getter, ''); this.nodeStack.pop(); } skipChildren = true; } } // `export_statement` itself is not extracted — the walker descends // into children, where the inner declaration (lexical_declaration, // function_declaration, class_declaration, etc.) is dispatched to // its own extractor. `isExported` walks the parent chain, so the // exported flag is preserved automatically. // // Calling extractExportedVariables here AND descending caused every // `export const X = ...` to produce two nodes for the same symbol — // one kind:'variable' from extractExportedVariables and one // kind:'constant' from extractVariable. The dedicated dispatch is // the correct one (it picks kind from isConst, captures the // initializer signature, and walks type annotations); the // export-statement helper was redundant. // Check for imports else if (this.extractor.importTypes.includes(nodeType)) { this.extractImport(node); } // Re-export from another module — `export { X } from './y'` (TS/JS). A // re-export is a dependency on the source module just like an import, but // the export_statement is otherwise only descended into (no declaration to // extract), so a barrel that ONLY re-exports produced zero edges and showed // 0 dependents. Link each re-exported name to its definition. Children are // still visited (a non-re-export `export const X = …` has no `source` and // falls through to its normal declaration extraction). else if ( nodeType === 'export_statement' && (this.language === 'typescript' || this.language === 'tsx' || this.language === 'javascript' || this.language === 'jsx') && getChildByField(node, 'source') ) { const parentId = this.nodeStack[this.nodeStack.length - 1]; if (parentId) this.emitReExportRefs(node, parentId); } // Vuex MODULE default export — `export default { namespaced, actions: {…}, // mutations: {…} }` (the canonical Vuex module shape). Object-literal methods // aren't otherwise extracted, so scan the config's actions/mutations/getters // collections and extract their methods as nodes. Store-file gated (the // ≥2-signal heuristic) so a plain default-exported object is untouched; skip // the subtree afterward (the collection methods are now handled). else if ( nodeType === 'export_statement' && (this.language === 'typescript' || this.language === 'tsx' || this.language === 'javascript' || this.language === 'jsx') && this.looksLikeVueStoreFile() ) { const exported = getChildByField(node, 'value'); if (exported && (exported.type === 'object' || exported.type === 'object_expression')) { this.extractStoreCollectionMethods(exported); skipChildren = true; } } // Check for function calls else if (this.extractor.callTypes.includes(nodeType)) { this.extractCall(node); } // `new Foo(...)` / `Foo::new(...)` / object_creation_expression — // produce an `instantiates` reference. Children still walked so // nested calls inside the constructor args (`new Foo(bar())`) get // their own `calls` refs. else if (INSTANTIATION_KINDS.has(nodeType)) { this.extractInstantiation(node); // Java/C# `new T(...) { ... }` — anonymous class with body. Without // extracting it as a class node + its methods, the interface→impl // synthesizer (Phase 5.5) can't bridge T's abstract methods to the // anonymous overrides, and an agent investigating a call through T // (`strategy.iterator(...)` where strategy is a Strategy lambda body) // has to Read the file to find the actual implementation. const anonBody = this.findAnonymousClassBody(node); if (anonBody) { this.extractAnonymousClass(node, anonBody); skipChildren = true; } } // (Decorator handling lives inside the symbol-creating extractors // — extractClass / extractFunction / extractProperty — because the // decorator node sits BEFORE the symbol in the AST and the walker // would otherwise see the wrong nodeStack head.) // Rust: `impl Trait for Type { ... }` — creates implements edge from Type to Trait else if (nodeType === 'impl_item') { this.extractRustImplItem(node); } // TypeScript interface members: property_signature (`foo: T`, `foo?: T`) // and method_signature (`foo(arg: A): R`) both carry type annotations the // interface walker would otherwise drop. Extract them as `references` // edges from the interface so resolvers can wire callers/impact for // types that only appear in interface members. else if ( (nodeType === 'property_signature' || nodeType === 'method_signature') && this.isInsideClassLikeNode() && this.TYPE_ANNOTATION_LANGUAGES.has(this.language) ) { const parentId = this.nodeStack[this.nodeStack.length - 1]; if (parentId) { this.extractTypeAnnotations(node, parentId); } // don't skipChildren — nested signatures still need traversal } // Visit children (unless the extract method already visited them) if (!skipChildren) { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child) { this.visitNode(child); } } } } /** * Create a Node object */ private createNode( kind: NodeKind, name: string, node: SyntaxNode, extra?: Partial ): Node | null { // Skip nodes with empty/missing names — they are not meaningful symbols // and would cause FK violations when edges reference them (see issue #42) if (!name) { return null; } const id = generateNodeId(this.filePath, kind, name, node.startPosition.row + 1); // Some grammars (e.g. Dart) model a function/method body as a *sibling* of // the signature node, so the declaration node's own range is just the // signature line. Extend endLine to the resolved body when it sits beyond // the node so the node spans its body — required for any body-level analysis // (callees, the callback synthesizer's body scan, context slices). Guarded to // only ever extend: for child-body grammars the body is within range (no-op). let endLine = node.endPosition.row + 1; if (kind === 'function' || kind === 'method') { const body = this.extractor?.resolveBody?.(node, this.extractor.bodyField); if (body && body.endPosition.row + 1 > endLine) { endLine = body.endPosition.row + 1; } } const newNode: Node = { id, kind, name, qualifiedName: this.buildQualifiedName(name), filePath: this.filePath, language: this.language, startLine: node.startPosition.row + 1, endLine, startColumn: node.startPosition.column, endColumn: node.endPosition.column, updatedAt: Date.now(), ...extra, }; // Persist extra symbol-level modifiers (e.g. Kotlin `expect`/`actual`) onto // the node's decorators list so the resolver can pair multiplatform // declarations with their implementations. Merged, not overwritten, so a // language that also captures real annotations keeps both. const mods = this.extractor?.extractModifiers?.(node); if (mods && mods.length > 0) { newNode.decorators = [...(newNode.decorators ?? []), ...mods]; } this.nodes.push(newNode); // Add containment edge from parent if (this.nodeStack.length > 0) { const parentId = this.nodeStack[this.nodeStack.length - 1]; if (parentId) { this.edges.push({ source: parentId, target: id, kind: 'contains', }); } } if (this.valueRefsEnabled) this.captureValueRefScope(kind, name, id, node); return newNode; } /** * Find first named child whose type is in the given list. * Used to locate inner type nodes (e.g. enum_specifier inside a typedef). */ private findChildByTypes(node: SyntaxNode, types: string[]): SyntaxNode | null { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child && types.includes(child.type)) return child; } return null; } /** * Find a `packageTypes` child under the root, create a `namespace` node * for it, and return its id so the caller can scope top-level * declarations underneath. Returns null when no package header is * present (script files, .kts without a package). */ private extractFilePackage(rootNode: SyntaxNode): string | null { const types = this.extractor?.packageTypes; if (!types || types.length === 0 || !this.extractor?.extractPackage) return null; let pkgNode: SyntaxNode | null = null; for (let i = 0; i < rootNode.namedChildCount; i++) { const child = rootNode.namedChild(i); if (child && types.includes(child.type)) { pkgNode = child; break; } } if (!pkgNode) return null; const pkgName = this.extractor.extractPackage(pkgNode, this.source); if (!pkgName) return null; const ns = this.createNode('namespace', pkgName, pkgNode); return ns?.id ?? null; } /** * Build qualified name from node stack */ private buildQualifiedName(name: string): string { // Build a qualified name from the semantic hierarchy only (no file path). // The file path is stored separately in filePath and pollutes FTS if included here. const parts: string[] = []; for (const nodeId of this.nodeStack) { const node = this.nodes.find((n) => n.id === nodeId); if (node && node.kind !== 'file') { parts.push(node.name); } } parts.push(name); return parts.join('::'); } /** * Build an ExtractorContext for passing to language-specific visitNode hooks. */ private makeExtractorContext(): ExtractorContext { // eslint-disable-next-line @typescript-eslint/no-this-alias const self = this; return { createNode: (kind, name, node, extra) => self.createNode(kind, name, node, extra), visitNode: (node) => self.visitNode(node), visitFunctionBody: (body, functionId) => self.visitFunctionBody(body, functionId), addUnresolvedReference: (ref) => self.unresolvedReferences.push(ref), pushScope: (nodeId) => self.nodeStack.push(nodeId), popScope: () => self.nodeStack.pop(), get filePath() { return self.filePath; }, get source() { return self.source; }, get nodeStack() { return self.nodeStack; }, get nodes() { return self.nodes; }, }; } /** * Check if the current node stack indicates we are inside a class-like node * (class, struct, interface, trait). File nodes do not count as class-like. */ private isInsideClassLikeNode(): boolean { if (this.nodeStack.length === 0) return false; const parentId = this.nodeStack[this.nodeStack.length - 1]; if (!parentId) return false; const parentNode = this.nodes.find((n) => n.id === parentId); if (!parentNode) return false; return ( parentNode.kind === 'class' || parentNode.kind === 'struct' || parentNode.kind === 'interface' || parentNode.kind === 'trait' || parentNode.kind === 'enum' || parentNode.kind === 'module' ); } /** * Ruby `CONST = …` assignment whose LHS is a `constant` node — a class/module * (or top-level) constant worth extracting as a symbol even inside a class. * Other languages don't give an assignment a `constant`-typed LHS, so this * gate is effectively Ruby-only. */ private isClassScopeConstantAssignment(node: SyntaxNode): boolean { if (node.type !== 'assignment') return false; const left = getChildByField(node, 'left') ?? node.namedChild(0); return left?.type === 'constant'; } /** * Extract a function */ private extractFunction(node: SyntaxNode, nameOverride?: string): void { if (!this.extractor) return; // If the language provides getReceiverType and this function has a receiver // (e.g., Rust function_item inside an impl block), extract as method instead if (this.extractor.getReceiverType?.(node, this.source)) { this.extractMethod(node); return; } // nameOverride is supplied only for explicitly-named anonymous functions the // caller resolved itself (e.g. arrow values of exported-const object members // — SvelteKit actions). Inline-object arrows reached by the general walker // get no override, so they still fall through to the skip below. let name = nameOverride ?? extractName(node, this.source, this.extractor); // For arrow functions and function expressions assigned to variables, // resolve the name from the parent variable_declarator. // e.g. `export const useAuth = () => { ... }` — the arrow_function node // has no `name` field; the name lives on the variable_declarator. if ( !nameOverride && name === '' && (node.type === 'arrow_function' || node.type === 'function_expression') ) { const parent = node.parent; if (parent?.type === 'variable_declarator') { const varName = getChildByField(parent, 'name'); if (varName) { name = getNodeText(varName, this.source); } } } if (name === '') { // Don't emit a node for the anonymous wrapper itself, but still visit its // body: AMD/RequireJS and CommonJS module wrappers (`define([], function(){…})`, // `(function(){…})()`) hold named inner functions and calls that would // otherwise be lost — the dispatcher set skipChildren, so nothing else // descends into this subtree. (#528) const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) ?? getChildByField(node, this.extractor.bodyField); if (body) { this.visitFunctionBody(body, ''); } return; } // Check for misparse artifacts (e.g. C++ macros causing "namespace detail" functions) // Skip the node but still visit the body for calls and structural nodes if (this.extractor.isMisparsedFunction?.(name, node)) { const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) ?? getChildByField(node, this.extractor.bodyField); if (body) { this.visitFunctionBody(body, ''); } return; } const docstring = getPrecedingDocstring(node, this.source); const signature = this.extractor.getSignature?.(node, this.source); const visibility = this.extractor.getVisibility?.(node); const isExported = this.extractor.isExported?.(node, this.source); const isAsync = this.extractor.isAsync?.(node); const isStatic = this.extractor.isStatic?.(node); const returnType = this.extractor.getReturnType?.(node, this.source); const funcNode = this.createNode('function', name, node, { docstring, signature, visibility, isExported, isAsync, isStatic, returnType, }); if (!funcNode) return; // Extract type annotations (parameter types and return type) this.extractTypeAnnotations(node, funcNode.id); // Extract decorators applied to the function (rare in JS/TS but // present in Python `@decorator def f():` and Java/Kotlin // annotations on free functions). this.extractDecoratorsFor(node, funcNode.id); // Push to stack and visit body this.nodeStack.push(funcNode.id); const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) ?? getChildByField(node, this.extractor.bodyField); if (body) { this.visitFunctionBody(body, funcNode.id); } this.nodeStack.pop(); } /** * Detect a React component declared via an HOC wrapper whose result is itself a * component: `forwardRef(...)`, `memo(...)`, `React.forwardRef/memo(...)`, and * styled-components / emotion `styled.tag\`…\`` / `styled(Base)\`…\``. These * initializers are a call / tagged-template (not a bare arrow), so the const is * otherwise classified `constant` — and a constant is skipped by both the * JSX-render edge synthesizer and component resolution, so `