callback-synthesizer.ts 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /**
  2. * Callback / observer edge synthesis — Phase 1 + 2.
  3. *
  4. * Closes dynamic-dispatch holes where a dispatcher invokes callbacks registered
  5. * elsewhere. Two channel shapes:
  6. *
  7. * (1) Field-backed observer (Phase 1):
  8. * onUpdate(cb) { this.callbacks.add(cb); } // registrar
  9. * triggerUpdate() { for (cb of this.callbacks) cb(); } // dispatcher
  10. * scene.onUpdate(this.triggerRender) // registration
  11. * → synthesize triggerUpdate → triggerRender
  12. *
  13. * (2) String-keyed EventEmitter (Phase 2):
  14. * this.on('mount', function onmount(){...}) // registration
  15. * fn.emit('mount', this) // dispatch
  16. * → synthesize (method containing emit('mount')) → onmount
  17. *
  18. * Whole-graph pass after base resolution. High-precision/low-recall by design:
  19. * named callbacks only; field channels paired by file+field; EventEmitter
  20. * channels capped by event fan-out (generic names like 'error' skipped — they
  21. * need receiver-type matching, deferred to Phase 3). All synthesized edges are
  22. * tagged `provenance:'heuristic'`. See docs/design/callback-edge-synthesis.md.
  23. */
  24. import type { Edge, Node } from '../types';
  25. import type { QueryBuilder } from '../db/queries';
  26. import type { ResolutionContext } from './types';
  27. const REGISTRAR_NAME = /^(on[A-Z]\w*|subscribe|addListener|addEventListener|register|watch|listen|addCallback)$/;
  28. const DISPATCHER_NAME = /(emit|trigger|notify|dispatch|fire|publish|flush)/i;
  29. const MAX_CALLBACKS_PER_CHANNEL = 40;
  30. const EVENT_FANOUT_CAP = 6; // skip events with more handlers/dispatchers than this (too generic without type info)
  31. const ON_RE = /\.(?:on|once|addListener)\(\s*['"]([^'"]+)['"]\s*,\s*(?:function\s+(\w+)|(?:this\.)?(\w+))/g;
  32. const EMIT_RE = /\.(?:emit|fire|dispatchEvent)\(\s*['"]([^'"]+)['"]/g;
  33. function sliceLines(content: string, startLine?: number, endLine?: number): string | null {
  34. if (!startLine || !endLine) return null;
  35. return content.split('\n').slice(startLine - 1, endLine).join('\n');
  36. }
  37. function registrarField(src: string): string | null {
  38. const m = src.match(/this\.(\w+)\.(?:add|push|set)\(/);
  39. return m ? m[1]! : null;
  40. }
  41. function dispatcherField(src: string): string | null {
  42. const forOf = src.match(/\bof\s+(?:Array\.from\(\s*)?this\.(\w+)/);
  43. if (forOf && /\b\w+\s*\(/.test(src)) return forOf[1]!;
  44. const forEach = src.match(/this\.(\w+)\.forEach\(/);
  45. if (forEach) return forEach[1]!;
  46. return null;
  47. }
  48. const FN_KINDS = new Set(['method', 'function', 'component']);
  49. /** Innermost function/method node whose line range contains `line`. */
  50. function enclosingFn(nodesInFile: Node[], line: number): Node | null {
  51. let best: Node | null = null;
  52. for (const n of nodesInFile) {
  53. if (!FN_KINDS.has(n.kind)) continue;
  54. const end = n.endLine ?? n.startLine;
  55. if (n.startLine <= line && end >= line) {
  56. if (!best || n.startLine >= best.startLine) best = n; // prefer the tightest (latest-starting) encloser
  57. }
  58. }
  59. return best;
  60. }
  61. /** Phase 1: field-backed observer channels (registrar/dispatcher share a store). */
  62. function fieldChannelEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] {
  63. const candidates = [...queries.getNodesByKind('method'), ...queries.getNodesByKind('function')];
  64. const registrars: Array<{ node: Node; field: string }> = [];
  65. const dispatchers: Array<{ node: Node; field: string }> = [];
  66. for (const m of candidates) {
  67. const isReg = REGISTRAR_NAME.test(m.name);
  68. const isDisp = DISPATCHER_NAME.test(m.name);
  69. if (!isReg && !isDisp) continue;
  70. const content = ctx.readFile(m.filePath);
  71. const src = content && sliceLines(content, m.startLine, m.endLine);
  72. if (!src) continue;
  73. if (isReg) { const f = registrarField(src); if (f) registrars.push({ node: m, field: f }); }
  74. if (isDisp) { const f = dispatcherField(src); if (f) dispatchers.push({ node: m, field: f }); }
  75. }
  76. const edges: Edge[] = [];
  77. const seen = new Set<string>();
  78. for (const reg of registrars) {
  79. const chDispatchers = dispatchers.filter(
  80. (d) => d.node.filePath === reg.node.filePath && d.field === reg.field
  81. );
  82. if (chDispatchers.length === 0) continue;
  83. const argRe = new RegExp(`${reg.node.name}\\s*\\(\\s*(?:this\\.)?(\\w+)`);
  84. let added = 0;
  85. for (const e of queries.getIncomingEdges(reg.node.id, ['calls'])) {
  86. if (added >= MAX_CALLBACKS_PER_CHANNEL) break;
  87. if (!e.line) continue;
  88. const caller = queries.getNodeById(e.source);
  89. if (!caller) continue;
  90. const line = ctx.readFile(caller.filePath)?.split('\n')[e.line - 1];
  91. const am = line?.match(argRe);
  92. if (!am) continue;
  93. const fn = ctx.getNodesByName(am[1]!).find((n) => n.kind === 'method' || n.kind === 'function');
  94. if (!fn) continue;
  95. for (const disp of chDispatchers) {
  96. if (disp.node.id === fn.id) continue;
  97. const key = `${disp.node.id}>${fn.id}`;
  98. if (seen.has(key)) continue;
  99. seen.add(key);
  100. edges.push({
  101. source: disp.node.id, target: fn.id, kind: 'calls', line: disp.node.startLine,
  102. provenance: 'heuristic',
  103. metadata: {
  104. synthesizedBy: 'callback', via: reg.node.name, field: reg.field,
  105. // Where the callback was wired up (`scene.onUpdate(this.triggerRender)`).
  106. // This is the #1 thing an agent reads/greps to explain the flow — surface
  107. // it so node/trace/context can show it without a callers() + Read round-trip.
  108. registeredAt: `${caller.filePath}:${e.line}`,
  109. },
  110. });
  111. added++;
  112. }
  113. }
  114. }
  115. return edges;
  116. }
  117. /** Phase 2: string-keyed EventEmitter channels (on('e', fn) ↔ emit('e')). */
  118. function eventEmitterEdges(ctx: ResolutionContext): Edge[] {
  119. const emitsByEvent = new Map<string, Set<string>>(); // event → dispatcher node ids
  120. const handlersByEvent = new Map<string, Map<string, string>>(); // event → handler id → registration site (file:line)
  121. for (const file of ctx.getAllFiles()) {
  122. const content = ctx.readFile(file);
  123. if (!content) continue;
  124. const hasEmit = content.includes('.emit(') || content.includes('.fire(') || content.includes('.dispatchEvent(');
  125. const hasOn = content.includes('.on(') || content.includes('.once(') || content.includes('.addListener(');
  126. if (!hasEmit && !hasOn) continue;
  127. const nodesInFile = ctx.getNodesInFile(file);
  128. const lineOf = (idx: number) => content.slice(0, idx).split('\n').length;
  129. if (hasEmit) {
  130. EMIT_RE.lastIndex = 0;
  131. let m: RegExpExecArray | null;
  132. while ((m = EMIT_RE.exec(content))) {
  133. const disp = enclosingFn(nodesInFile, lineOf(m.index));
  134. if (!disp) continue;
  135. const set = emitsByEvent.get(m[1]!) ?? new Set<string>();
  136. set.add(disp.id); emitsByEvent.set(m[1]!, set);
  137. }
  138. }
  139. if (hasOn) {
  140. ON_RE.lastIndex = 0;
  141. let m: RegExpExecArray | null;
  142. while ((m = ON_RE.exec(content))) {
  143. const handlerName = m[2] || m[3];
  144. if (!handlerName) continue;
  145. const handler = ctx.getNodesByName(handlerName).find((n) => n.kind === 'function' || n.kind === 'method');
  146. if (!handler) continue;
  147. const map = handlersByEvent.get(m[1]!) ?? new Map<string, string>();
  148. map.set(handler.id, `${file}:${lineOf(m.index)}`); handlersByEvent.set(m[1]!, map);
  149. }
  150. }
  151. }
  152. const edges: Edge[] = [];
  153. const seen = new Set<string>();
  154. for (const [event, dispatchers] of emitsByEvent) {
  155. const handlers = handlersByEvent.get(event);
  156. if (!handlers) continue;
  157. // Precision guard: a generic event name with many handlers/dispatchers can't
  158. // be matched without receiver-type info (Phase 3) — skip rather than over-link.
  159. if (dispatchers.size > EVENT_FANOUT_CAP || handlers.size > EVENT_FANOUT_CAP) continue;
  160. for (const d of dispatchers) for (const [h, registeredAt] of handlers) {
  161. if (d === h) continue;
  162. const key = `${d}>${h}`;
  163. if (seen.has(key)) continue;
  164. seen.add(key);
  165. edges.push({ source: d, target: h, kind: 'calls', provenance: 'heuristic', metadata: { synthesizedBy: 'event-emitter', event, registeredAt } });
  166. }
  167. }
  168. return edges;
  169. }
  170. /**
  171. * Synthesize dispatcher→callback edges (field observers + EventEmitters).
  172. * Returns the count added. Never throws into indexing — callers wrap in try/catch.
  173. */
  174. export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionContext): number {
  175. const fieldEdges = fieldChannelEdges(queries, ctx);
  176. const emitterEdges = eventEmitterEdges(ctx);
  177. const merged: Edge[] = [];
  178. const seen = new Set<string>();
  179. for (const e of [...fieldEdges, ...emitterEdges]) {
  180. const key = `${e.source}>${e.target}`;
  181. if (seen.has(key)) continue;
  182. seen.add(key);
  183. merged.push(e);
  184. }
  185. if (merged.length > 0) queries.insertEdges(merged);
  186. return merged.length;
  187. }