/** * Callback / observer edge synthesis — Phase 1 + 2. * * Closes dynamic-dispatch holes where a dispatcher invokes callbacks registered * elsewhere. Two channel shapes: * * (1) Field-backed observer (Phase 1): * onUpdate(cb) { this.callbacks.add(cb); } // registrar * triggerUpdate() { for (cb of this.callbacks) cb(); } // dispatcher * scene.onUpdate(this.triggerRender) // registration * → synthesize triggerUpdate → triggerRender * * (2) String-keyed EventEmitter (Phase 2): * this.on('mount', function onmount(){...}) // registration * fn.emit('mount', this) // dispatch * → synthesize (method containing emit('mount')) → onmount * * Whole-graph pass after base resolution. High-precision/low-recall by design: * named callbacks only; field channels paired by file+field; EventEmitter * channels capped by event fan-out (generic names like 'error' skipped — they * need receiver-type matching, deferred to Phase 3). All synthesized edges are * tagged `provenance:'heuristic'`. See docs/design/callback-edge-synthesis.md. */ import type { Edge, Node } from '../types'; import type { QueryBuilder } from '../db/queries'; import type { ResolutionContext } from './types'; import { isGeneratedFile } from '../extraction/generated-detection'; const REGISTRAR_NAME = /^(on[A-Z]\w*|subscribe|addListener|addEventListener|register|watch|listen|addCallback)$/; const DISPATCHER_NAME = /(emit|trigger|notify|dispatch|fire|publish|flush)/i; const MAX_CALLBACKS_PER_CHANNEL = 40; const EVENT_FANOUT_CAP = 6; // skip events with more handlers/dispatchers than this (too generic without type info) const ON_RE = /\.(?:on|once|addListener)\(\s*['"]([^'"]+)['"]\s*,\s*(?:function\s+(\w+)|(?:this\.)?(\w+))/g; const EMIT_RE = /\.(?:emit|fire|dispatchEvent)\(\s*['"]([^'"]+)['"]/g; const SETSTATE_RE = /this\.setState\s*\(/; const FLUTTER_SETSTATE_RE = /\bsetState\s*\(/; // Flutter: setState((){…}) / this.setState const JSX_TAG_RE = /<([A-Z][A-Za-z0-9_]*)[\s/>]/g; const MAX_JSX_CHILDREN = 30; // Vue SFC templates: kebab-case child components ( → ElButton) and // event bindings (@click="fn" / v-on:click="fn"). PascalCase children () // are already caught by JSX_TAG_RE via the SFC component node. const VUE_KEBAB_RE = /<([a-z][a-z0-9]*(?:-[a-z0-9]+)+)[\s/>]/g; const VUE_HANDLER_RE = /(?:@|v-on:)([a-zA-Z][\w-]*)(?:\.[\w]+)*\s*=\s*"([^"]+)"/g; // Composable/hook destructure: `const { close: closeSidebar } = useSidebarControl()`. // Captures the destructure body + the called composable; only `use*` calls qualify. const VUE_DESTRUCTURE_RE = /(?:const|let|var)\s*\{([^}]+)\}\s*=\s*(\w+)\s*\(/g; function kebabToPascal(s: string): string { return s.split('-').map((p) => p.charAt(0).toUpperCase() + p.slice(1)).join(''); } function sliceLines(content: string, startLine?: number, endLine?: number): string | null { if (!startLine || !endLine) return null; return content.split('\n').slice(startLine - 1, endLine).join('\n'); } function registrarField(src: string): string | null { const m = src.match(/this\.(\w+)\.(?:add|push|set)\(/); return m ? m[1]! : null; } function dispatcherField(src: string): string | null { const forOf = src.match(/\bof\s+(?:Array\.from\(\s*)?this\.(\w+)/); if (forOf && /\b\w+\s*\(/.test(src)) return forOf[1]!; const forEach = src.match(/this\.(\w+)\.forEach\(/); if (forEach) return forEach[1]!; return null; } const FN_KINDS = new Set(['method', 'function', 'component']); /** Innermost function/method node whose line range contains `line`. */ function enclosingFn(nodesInFile: Node[], line: number): Node | null { let best: Node | null = null; for (const n of nodesInFile) { if (!FN_KINDS.has(n.kind)) continue; const end = n.endLine ?? n.startLine; if (n.startLine <= line && end >= line) { if (!best || n.startLine >= best.startLine) best = n; // prefer the tightest (latest-starting) encloser } } return best; } /** Phase 1: field-backed observer channels (registrar/dispatcher share a store). */ function fieldChannelEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] { const candidates = [...queries.getNodesByKind('method'), ...queries.getNodesByKind('function')]; const registrars: Array<{ node: Node; field: string }> = []; const dispatchers: Array<{ node: Node; field: string }> = []; for (const m of candidates) { const isReg = REGISTRAR_NAME.test(m.name); const isDisp = DISPATCHER_NAME.test(m.name); if (!isReg && !isDisp) continue; const content = ctx.readFile(m.filePath); const src = content && sliceLines(content, m.startLine, m.endLine); if (!src) continue; if (isReg) { const f = registrarField(src); if (f) registrars.push({ node: m, field: f }); } if (isDisp) { const f = dispatcherField(src); if (f) dispatchers.push({ node: m, field: f }); } } const edges: Edge[] = []; const seen = new Set(); for (const reg of registrars) { const chDispatchers = dispatchers.filter( (d) => d.node.filePath === reg.node.filePath && d.field === reg.field ); if (chDispatchers.length === 0) continue; const argRe = new RegExp(`${reg.node.name}\\s*\\(\\s*(?:this\\.)?(\\w+)`); let added = 0; for (const e of queries.getIncomingEdges(reg.node.id, ['calls'])) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; if (!e.line) continue; const caller = queries.getNodeById(e.source); if (!caller) continue; const line = ctx.readFile(caller.filePath)?.split('\n')[e.line - 1]; const am = line?.match(argRe); if (!am) continue; const fn = ctx.getNodesByName(am[1]!).find((n) => n.kind === 'method' || n.kind === 'function'); if (!fn) continue; for (const disp of chDispatchers) { if (disp.node.id === fn.id) continue; const key = `${disp.node.id}>${fn.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: disp.node.id, target: fn.id, kind: 'calls', line: disp.node.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'callback', via: reg.node.name, field: reg.field, // Where the callback was wired up (`scene.onUpdate(this.triggerRender)`). // This is the #1 thing an agent reads/greps to explain the flow — surface // it so node/trace/context can show it without a callers() + Read round-trip. registeredAt: `${caller.filePath}:${e.line}`, }, }); added++; } } } return edges; } /** Phase 2: string-keyed EventEmitter channels (on('e', fn) ↔ emit('e')). */ function eventEmitterEdges(ctx: ResolutionContext): Edge[] { const emitsByEvent = new Map>(); // event → dispatcher node ids const handlersByEvent = new Map>(); // event → handler id → registration site (file:line) for (const file of ctx.getAllFiles()) { const content = ctx.readFile(file); if (!content) continue; const hasEmit = content.includes('.emit(') || content.includes('.fire(') || content.includes('.dispatchEvent('); const hasOn = content.includes('.on(') || content.includes('.once(') || content.includes('.addListener('); if (!hasEmit && !hasOn) continue; const nodesInFile = ctx.getNodesInFile(file); const lineOf = (idx: number) => content.slice(0, idx).split('\n').length; if (hasEmit) { EMIT_RE.lastIndex = 0; let m: RegExpExecArray | null; while ((m = EMIT_RE.exec(content))) { const disp = enclosingFn(nodesInFile, lineOf(m.index)); if (!disp) continue; const set = emitsByEvent.get(m[1]!) ?? new Set(); set.add(disp.id); emitsByEvent.set(m[1]!, set); } } if (hasOn) { ON_RE.lastIndex = 0; let m: RegExpExecArray | null; while ((m = ON_RE.exec(content))) { const handlerName = m[2] || m[3]; if (!handlerName) continue; const handler = ctx.getNodesByName(handlerName).find((n) => n.kind === 'function' || n.kind === 'method'); if (!handler) continue; const map = handlersByEvent.get(m[1]!) ?? new Map(); map.set(handler.id, `${file}:${lineOf(m.index)}`); handlersByEvent.set(m[1]!, map); } } } const edges: Edge[] = []; const seen = new Set(); for (const [event, dispatchers] of emitsByEvent) { const handlers = handlersByEvent.get(event); if (!handlers) continue; // Precision guard: a generic event name with many handlers/dispatchers can't // be matched without receiver-type info (Phase 3) — skip rather than over-link. if (dispatchers.size > EVENT_FANOUT_CAP || handlers.size > EVENT_FANOUT_CAP) continue; for (const d of dispatchers) for (const [h, registeredAt] of handlers) { if (d === h) continue; const key = `${d}>${h}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: d, target: h, kind: 'calls', provenance: 'heuristic', metadata: { synthesizedBy: 'event-emitter', event, registeredAt } }); } } return edges; } /** * Phase 4: React class-component re-render. `this.setState(...)` re-runs the * component's `render()`, but that hop is React-internal — no static edge — so a * flow like "mutation → setState → canvas repaint" dead-ends at setState even * though `render → getRenderableElements → …` is fully call-connected after it. * Bridge it: for each class that has a `render` method, link every sibling method * whose body calls `this.setState(` → `render`. The setState gate keeps this to * React class components (a non-React class with a `render` method won't call * `this.setState`). Over-approximation (all setState methods reach render) is * accepted — it's reachability-correct, like the callback channels. */ function reactRenderEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] { const edges: Edge[] = []; const seen = new Set(); for (const cls of queries.getNodesByKind('class')) { const children = queries.getOutgoingEdges(cls.id, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); const render = children.find((n) => n.name === 'render'); if (!render) continue; let added = 0; for (const m of children) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; if (m.id === render.id) continue; const content = ctx.readFile(m.filePath); const src = content && sliceLines(content, m.startLine, m.endLine); if (!src || !SETSTATE_RE.test(src)) continue; const key = `${m.id}>${render.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: m.id, target: render.id, kind: 'calls', line: m.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'react-render', via: 'setState', registeredAt: `${render.filePath}:${render.startLine}` }, }); added++; } } return edges; } /** * Phase 4b: Flutter setState → build (the Dart analog of react-render). In a * StatefulWidget's State class, `setState(() {…})` re-runs `build(context)`, but * that hop is framework-internal (Flutter calls build), so a flow like * "onPressed → _increment → setState → rebuilt UI" dead-ends at setState. Bridge * it: for each Dart class with a `build` method, link every sibling method whose * body calls `setState(` → `build`. The setState gate + `.dart` file keep this to * Flutter State classes. Over-approximation accepted (reachability-correct). */ function flutterBuildEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] { const edges: Edge[] = []; const seen = new Set(); for (const cls of queries.getNodesByKind('class')) { const children = queries.getOutgoingEdges(cls.id, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); const build = children.find((n) => n.name === 'build'); if (!build || !build.filePath.endsWith('.dart')) continue; let added = 0; for (const m of children) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; if (m.id === build.id) continue; const content = ctx.readFile(m.filePath); const src = content && sliceLines(content, m.startLine, m.endLine); if (!src || !FLUTTER_SETSTATE_RE.test(src)) continue; const key = `${m.id}>${build.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: m.id, target: build.id, kind: 'calls', line: m.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'flutter-build', via: 'setState', registeredAt: `${build.filePath}:${build.startLine}` }, }); added++; } } return edges; } /** * Phase 4c: C++ virtual override. A call through a base/interface pointer * (`db->Get(...)`, `iter->Next()`) dispatches at runtime to a subclass override, * but that hop is a vtable indirection — no static call edge — so a flow stops at * the abstract base method. Bridge it like react-render: for each C++ class that * `extends` a base, link each base method → the subclass method of the same name * (the override), so trace/callees from the interface method reach the * implementation(s). Over-approximation accepted (reachability-correct); capped * per class and gated to C++ to avoid touching other languages' dispatch. */ function cppOverrideEdges(queries: QueryBuilder): Edge[] { const edges: Edge[] = []; const seen = new Set(); const methodsOf = (classId: string): Node[] => queries .getOutgoingEdges(classId, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); for (const cls of queries.getNodesByKind('class')) { const subMethods = methodsOf(cls.id).filter((n) => n.language === 'cpp'); if (subMethods.length === 0) continue; for (const ext of queries.getOutgoingEdges(cls.id, ['extends'])) { const base = queries.getNodeById(ext.target); if (!base || base.language !== 'cpp' || base.id === cls.id) continue; const baseMethods = new Map(methodsOf(base.id).map((m) => [m.name, m])); let added = 0; for (const m of subMethods) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; const bm = baseMethods.get(m.name); if (!bm || bm.id === m.id) continue; const key = `${bm.id}>${m.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: bm.id, target: m.id, kind: 'calls', line: bm.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'cpp-override', via: m.name, registeredAt: `${m.filePath}:${m.startLine}` }, }); added++; } } } return edges; } /** * Phase 5.5: interface / abstract dispatch (Java, Kotlin). A call through an * injected interface (`@Autowired FooService svc; svc.list()`) or an abstract * base dispatches at runtime to the implementing class's override — a vtable * indirection with no static call edge — so a request→service flow stops at the * interface method. Bridge it like cpp-override: for each class that * `implements` an interface (or `extends` an abstract base), link each * base/interface method → the class's same-name method (the override) so * trace/callees reach the implementation. Over-approximation accepted * (reachability-correct); capped per class, gated to JVM languages. */ const IFACE_OVERRIDE_LANGS = new Set(['java', 'kotlin']); function interfaceOverrideEdges(queries: QueryBuilder): Edge[] { const edges: Edge[] = []; const seen = new Set(); const methodsOf = (classId: string): Node[] => queries .getOutgoingEdges(classId, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); for (const cls of queries.getNodesByKind('class')) { const implMethods = methodsOf(cls.id).filter((n) => IFACE_OVERRIDE_LANGS.has(n.language)); if (implMethods.length === 0) continue; for (const sup of queries.getOutgoingEdges(cls.id, ['implements', 'extends'])) { const base = queries.getNodeById(sup.target); if (!base || !IFACE_OVERRIDE_LANGS.has(base.language) || base.id === cls.id) continue; // Group impl methods by name to handle OVERLOADS: an interface `list()` and // `list(params)` are distinct nodes and a call may resolve to either, so // link every base overload → every same-name impl overload (keying by name // alone would drop all but one and miss the resolved overload). const implByName = new Map(); for (const m of implMethods) { const arr = implByName.get(m.name); if (arr) arr.push(m); else implByName.set(m.name, [m]); } let added = 0; for (const bm of methodsOf(base.id)) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; for (const m of implByName.get(bm.name) ?? []) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; if (bm.id === m.id) continue; const key = `${bm.id}>${m.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: bm.id, target: m.id, kind: 'calls', line: bm.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'interface-impl', via: m.name, registeredAt: `${m.filePath}:${m.startLine}` }, }); added++; } } } } return edges; } /** * Go gRPC stub → impl bridge. The protoc-gen-go-grpc codegen emits an * `UnimplementedXxxServer` struct in `*_grpc.pb.go` carrying one method * per service RPC; the real handler is a hand-written struct in another * file (`x/bank/keeper/msg_server.go::msgServer.Send` in cosmos-sdk). * Go's structural typing means no `implements` edge exists for our * resolver to follow, so `trace("Send","SendCoins")` lands on the * empty stub and reports "no path" (validated empirically — the cosmos * Q1 r1 trace failure that drove this work). * * Bridge: for each `UnimplementedXxxServer` whose RPC-method names are * a SUBSET of some other Go struct's method names, emit `calls` edges * `stub.method → impl.method` (paired by name). Excludes the gRPC * internal markers `mustEmbedUnimplementedXxxServer` and * `testEmbeddedByValue`, and skips candidate impls that themselves * live in a generated file (their `xxxClient` / sibling stubs would * otherwise look like impls). * * Multiple candidates is allowed and capped at MAX_CALLBACKS_PER_CHANNEL — * a service often has both a production impl and one or more test * mocks; linking to all preserves trace utility without false-favoring. * * Provenance: `heuristic`, `synthesizedBy: 'go-grpc-stub-impl'`. The * stub's source line is the wiring site shown in the trace trail. */ function goGrpcStubImplEdges(queries: QueryBuilder): Edge[] { const edges: Edge[] = []; const seen = new Set(); const STUB_RE = /^Unimplemented.*Server$/; // gRPC internal-helper methods that appear on every Unimplemented*Server; // not part of the service contract, so exclude when computing the RPC-method // signature used to match impls. const isInternalMarker = (n: string) => n.startsWith('mustEmbed') || n === 'testEmbeddedByValue'; // Methods directly contained by each Go struct, name-only. Built once. const methodNamesByStruct = new Map>(); const methodNodesByStruct = new Map(); const goStructs: Node[] = []; for (const s of queries.getNodesByKind('struct')) { if (s.language !== 'go') continue; goStructs.push(s); const ms = queries .getOutgoingEdges(s.id, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); methodNodesByStruct.set(s.id, ms); methodNamesByStruct.set(s.id, new Set(ms.map((m) => m.name))); } for (const stub of goStructs) { if (!STUB_RE.test(stub.name)) continue; // The stub MUST live in a generated file — that's what tells us this is // a protoc-emitted scaffold rather than someone naming a struct // `UnimplementedXxxServer` by hand. Without this gate we'd also bridge // such hand-written structs and create misleading edges. if (!isGeneratedFile(stub.filePath)) continue; const stubMethods = (methodNodesByStruct.get(stub.id) ?? []).filter( (m) => !isInternalMarker(m.name), ); if (stubMethods.length === 0) continue; const stubMethodNames = stubMethods.map((m) => m.name); for (const cand of goStructs) { if (cand.id === stub.id) continue; // Skip generated-file candidates — they're siblings (msgClient, // UnsafeMsgServer, …) whose method sets coincidentally match. if (isGeneratedFile(cand.filePath)) continue; const candNames = methodNamesByStruct.get(cand.id); if (!candNames) continue; // Subset: every RPC method must exist on the candidate by name. // Signature-level match would tighten this further, but name-match // alone already gives one-to-one pairing in real codebases because // gRPC method-name sets are highly distinctive (Send + MultiSend + // UpdateParams + SetSendEnabled is unique to bank's MsgServer). if (!stubMethodNames.every((n) => candNames.has(n))) continue; const candMethods = methodNodesByStruct.get(cand.id) ?? []; let added = 0; for (const sm of stubMethods) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; for (const cm of candMethods) { if (added >= MAX_CALLBACKS_PER_CHANNEL) break; if (cm.name !== sm.name) continue; const key = `${sm.id}>${cm.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: sm.id, target: cm.id, kind: 'calls', line: sm.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'go-grpc-stub-impl', via: cm.name, registeredAt: `${cm.filePath}:${cm.startLine}`, }, }); added++; } } } } return edges; } /** * Phase 5: React JSX child rendering. A component that returns `` * mounts Child — React calls it — but JSX instantiation isn't a static call edge, * so a render tree (App.render → StaticCanvas → renderStaticScene) breaks at the * JSX hop. Link parent → each capitalized JSX child it renders. File-oriented * (read each JSX file once). Precision gate: the child name must resolve to a * component/function/class node — TS generics like `Array` resolve to a type * (or nothing) and are dropped. */ function reactJsxChildEdges(ctx: ResolutionContext): Edge[] { const edges: Edge[] = []; const seen = new Set(); const PARENT_KINDS = new Set(['method', 'function', 'component']); for (const file of ctx.getAllFiles()) { const content = ctx.readFile(file); if (!content || (!content.includes(''))) continue; // JSX-file gate const parents = ctx.getNodesInFile(file).filter((n) => PARENT_KINDS.has(n.kind)); for (const parent of parents) { const src = sliceLines(content, parent.startLine, parent.endLine); if (!src || (!src.includes(''))) continue; const names = new Set(); JSX_TAG_RE.lastIndex = 0; let m: RegExpExecArray | null; while ((m = JSX_TAG_RE.exec(src))) names.add(m[1]!); let added = 0; for (const name of names) { if (added >= MAX_JSX_CHILDREN) break; const child = ctx.getNodesByName(name).find( (n) => n.kind === 'component' || n.kind === 'function' || n.kind === 'class' ); if (!child || child.id === parent.id) continue; const key = `${parent.id}>${child.id}`; if (seen.has(key)) continue; seen.add(key); edges.push({ source: parent.id, target: child.id, kind: 'calls', line: parent.startLine, provenance: 'heuristic', metadata: { synthesizedBy: 'jsx-render', via: name }, }); added++; } } } return edges; } /** * Phase 6: Vue SFC templates. The `.vue` extractor only parses `