index.ts 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
  1. /**
  2. * Reference Resolution Orchestrator
  3. *
  4. * Coordinates all reference resolution strategies.
  5. */
  6. import * as fs from 'fs';
  7. import * as path from 'path';
  8. import { Node, UnresolvedReference, Edge } from '../types';
  9. import { QueryBuilder } from '../db/queries';
  10. import {
  11. UnresolvedRef,
  12. ResolvedRef,
  13. ResolutionResult,
  14. ResolutionContext,
  15. FrameworkResolver,
  16. ImportMapping,
  17. } from './types';
  18. import { matchReference } from './name-matcher';
  19. import { resolveViaImport, extractImportMappings, extractReExports } from './import-resolver';
  20. import { detectFrameworks } from './frameworks';
  21. import { synthesizeCallbackEdges } from './callback-synthesizer';
  22. import { loadProjectAliases, type AliasMap } from './path-aliases';
  23. import { logDebug } from '../errors';
  24. import type { ReExport } from './types';
  25. import { LRUCache } from './lru-cache';
  26. /**
  27. * Cache size limits. Each per-resolver cache is bounded so memory
  28. * stays flat on large codebases (20k+ files). Sizes were chosen to
  29. * cover the working set for typical resolution batches without
  30. * exceeding a few hundred MB worst-case. Override via the env var
  31. * `CODEGRAPH_RESOLVER_CACHE_SIZE` (single integer applied to all
  32. * caches) when tuning for very large or very small projects.
  33. */
  34. const DEFAULT_CACHE_LIMIT = 5_000;
  35. function resolveCacheLimit(): number {
  36. const raw = process.env.CODEGRAPH_RESOLVER_CACHE_SIZE;
  37. if (!raw) return DEFAULT_CACHE_LIMIT;
  38. const parsed = Number.parseInt(raw, 10);
  39. if (Number.isFinite(parsed) && parsed > 0) return parsed;
  40. return DEFAULT_CACHE_LIMIT;
  41. }
  42. // Re-export types
  43. export * from './types';
  44. // Pre-built Sets for O(1) built-in lookups (allocated once, shared across all instances)
  45. const JS_BUILT_INS = new Set([
  46. 'console', 'window', 'document', 'global', 'process',
  47. 'Promise', 'Array', 'Object', 'String', 'Number', 'Boolean',
  48. 'Date', 'Math', 'JSON', 'RegExp', 'Error', 'Map', 'Set',
  49. 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
  50. 'fetch', 'require', 'module', 'exports', '__dirname', '__filename',
  51. ]);
  52. const REACT_HOOKS = new Set([
  53. 'useState', 'useEffect', 'useContext', 'useReducer', 'useCallback',
  54. 'useMemo', 'useRef', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
  55. ]);
  56. const PYTHON_BUILT_INS = new Set([
  57. 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
  58. 'open', 'input', 'type', 'isinstance', 'hasattr', 'getattr', 'setattr',
  59. 'super', 'self', 'cls', 'None', 'True', 'False',
  60. ]);
  61. const PYTHON_BUILT_IN_TYPES = new Set([
  62. 'list', 'dict', 'set', 'tuple', 'str', 'int', 'float', 'bool',
  63. 'bytes', 'bytearray', 'frozenset', 'object', 'super',
  64. ]);
  65. const PYTHON_BUILT_IN_METHODS = new Set([
  66. 'append', 'extend', 'insert', 'remove', 'pop', 'clear', 'sort', 'reverse', 'copy',
  67. 'update', 'keys', 'values', 'items', 'get',
  68. 'add', 'discard', 'union', 'intersection', 'difference',
  69. 'split', 'join', 'strip', 'lstrip', 'rstrip', 'replace', 'lower', 'upper',
  70. 'startswith', 'endswith', 'find', 'index', 'count', 'encode', 'decode',
  71. 'format', 'isdigit', 'isalpha', 'isalnum',
  72. 'read', 'write', 'readline', 'readlines', 'close', 'flush', 'seek',
  73. ]);
  74. const GO_STDLIB_PACKAGES = new Set([
  75. 'fmt', 'os', 'io', 'net', 'http', 'log', 'math', 'sort', 'sync',
  76. 'time', 'path', 'bytes', 'strings', 'strconv', 'errors', 'context',
  77. 'json', 'xml', 'csv', 'html', 'template', 'regexp', 'reflect',
  78. 'runtime', 'testing', 'flag', 'bufio', 'crypto', 'encoding',
  79. 'filepath', 'hash', 'mime', 'rand', 'signal', 'sql', 'syscall',
  80. 'unicode', 'unsafe', 'atomic', 'binary', 'debug', 'exec', 'heap',
  81. 'ring', 'scanner', 'tar', 'zip', 'gzip', 'zlib', 'tls', 'url',
  82. 'user', 'pprof', 'trace', 'ast', 'build', 'parser', 'printer',
  83. 'token', 'types', 'cgo', 'plugin', 'race', 'ioutil',
  84. // Kubernetes-common stdlib aliases
  85. 'utilruntime', 'utilwait', 'utilnet',
  86. ]);
  87. const GO_BUILT_INS = new Set([
  88. 'make', 'new', 'len', 'cap', 'append', 'copy', 'delete', 'close',
  89. 'panic', 'recover', 'print', 'println', 'complex', 'real', 'imag',
  90. 'error', 'nil', 'true', 'false', 'iota',
  91. 'int', 'int8', 'int16', 'int32', 'int64',
  92. 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uintptr',
  93. 'float32', 'float64', 'complex64', 'complex128',
  94. 'string', 'bool', 'byte', 'rune', 'any',
  95. ]);
  96. const PASCAL_UNIT_PREFIXES = [
  97. 'System.', 'Winapi.', 'Vcl.', 'Fmx.', 'Data.', 'Datasnap.',
  98. 'Soap.', 'Xml.', 'Web.', 'REST.', 'FireDAC.', 'IBX.',
  99. 'IdHTTP', 'IdTCP', 'IdSSL',
  100. ];
  101. const PASCAL_BUILT_INS = new Set([
  102. 'System', 'SysUtils', 'Classes', 'Types', 'Variants', 'StrUtils',
  103. 'Math', 'DateUtils', 'IOUtils', 'Generics.Collections', 'Generics.Defaults',
  104. 'Rtti', 'TypInfo', 'SyncObjs', 'RegularExpressions',
  105. 'SysInit', 'Windows', 'Messages', 'Graphics', 'Controls', 'Forms',
  106. 'Dialogs', 'StdCtrls', 'ExtCtrls', 'ComCtrls', 'Menus', 'ActnList',
  107. 'WriteLn', 'Write', 'ReadLn', 'Read', 'Inc', 'Dec', 'Ord', 'Chr',
  108. 'Length', 'SetLength', 'High', 'Low', 'Assigned', 'FreeAndNil',
  109. 'Format', 'IntToStr', 'StrToInt', 'FloatToStr', 'StrToFloat',
  110. 'Trim', 'UpperCase', 'LowerCase', 'Pos', 'Copy', 'Delete', 'Insert',
  111. 'Now', 'Date', 'Time', 'DateToStr', 'StrToDate',
  112. 'Raise', 'Exit', 'Break', 'Continue', 'Abort',
  113. 'True', 'False', 'nil', 'Self', 'Result',
  114. 'Create', 'Destroy', 'Free',
  115. 'TObject', 'TComponent', 'TPersistent', 'TInterfacedObject',
  116. 'TList', 'TStringList', 'TStrings', 'TStream', 'TMemoryStream', 'TFileStream',
  117. 'Exception', 'EAbort', 'EConvertError', 'EAccessViolation',
  118. 'IInterface', 'IUnknown',
  119. ]);
  120. /**
  121. * Reference Resolver
  122. *
  123. * Orchestrates reference resolution using multiple strategies.
  124. */
  125. export class ReferenceResolver {
  126. private projectRoot: string;
  127. private queries: QueryBuilder;
  128. private context: ResolutionContext;
  129. private frameworks: FrameworkResolver[] = [];
  130. // All per-resolver caches are LRU-bounded. Previously these were
  131. // unbounded Maps that grew with every distinct lookup and OOM'd on
  132. // codebases with 20k+ files (see issue: unbounded cache growth).
  133. private nodeCache: LRUCache<string, Node[]>; // per-file node cache
  134. private fileCache: LRUCache<string, string | null>; // per-file content cache
  135. private importMappingCache: LRUCache<string, ImportMapping[]>;
  136. private reExportCache: LRUCache<string, ReExport[]>;
  137. private nameCache: LRUCache<string, Node[]>; // name → nodes cache
  138. private lowerNameCache: LRUCache<string, Node[]>; // lower(name) → nodes cache
  139. private qualifiedNameCache: LRUCache<string, Node[]>; // qualified_name → nodes cache
  140. private knownNames: Set<string> | null = null; // all known symbol names for fast pre-filtering
  141. private knownFiles: Set<string> | null = null;
  142. private cachesWarmed = false;
  143. // tsconfig/jsconfig path-alias map. `undefined` = not yet computed,
  144. // `null` = computed and absent. Treated as immutable for the
  145. // resolver's lifetime; callers re-create the resolver if config changes.
  146. private projectAliases: AliasMap | null | undefined = undefined;
  147. constructor(projectRoot: string, queries: QueryBuilder) {
  148. this.projectRoot = projectRoot;
  149. this.queries = queries;
  150. const limit = resolveCacheLimit();
  151. // The content cache is heavier (full file text), so we give it a
  152. // smaller budget than the metadata caches.
  153. const contentLimit = Math.max(64, Math.floor(limit / 5));
  154. this.nodeCache = new LRUCache(limit);
  155. this.fileCache = new LRUCache(contentLimit);
  156. this.importMappingCache = new LRUCache(limit);
  157. this.reExportCache = new LRUCache(limit);
  158. this.nameCache = new LRUCache(limit);
  159. this.lowerNameCache = new LRUCache(limit);
  160. this.qualifiedNameCache = new LRUCache(limit);
  161. this.context = this.createContext();
  162. }
  163. /**
  164. * Initialize the resolver (detect frameworks, etc.)
  165. */
  166. initialize(): void {
  167. this.frameworks = detectFrameworks(this.context);
  168. this.clearCaches();
  169. }
  170. /**
  171. * Run each framework resolver's cross-file finalization pass and persist
  172. * the returned node updates. Idempotent — safe to call after every indexAll
  173. * and every incremental sync. Returns the number of nodes updated.
  174. *
  175. * Caches are cleared before/after so the post-extract pass sees fresh DB
  176. * state and downstream queries see the updated names.
  177. */
  178. runPostExtract(): number {
  179. let updated = 0;
  180. this.clearCaches();
  181. for (const fw of this.frameworks) {
  182. if (!fw.postExtract) continue;
  183. try {
  184. const nodes = fw.postExtract(this.context);
  185. for (const node of nodes) {
  186. this.queries.updateNode(node);
  187. updated++;
  188. }
  189. } catch (err) {
  190. logDebug(`Framework '${fw.name}' postExtract failed`, {
  191. error: err instanceof Error ? err.message : String(err),
  192. });
  193. }
  194. }
  195. if (updated > 0) this.clearCaches();
  196. return updated;
  197. }
  198. /**
  199. * Pre-build lightweight caches for resolution.
  200. * Node lookups are now handled by indexed SQLite queries instead of
  201. * loading all nodes into memory (which caused OOM on large codebases).
  202. * We cache the set of known symbol names for fast pre-filtering.
  203. */
  204. warmCaches(): void {
  205. if (this.cachesWarmed) return;
  206. // Only cache the set of known file paths (lightweight string set)
  207. this.knownFiles = new Set(this.queries.getAllFilePaths());
  208. // Cache all distinct symbol names for fast pre-filtering (just strings, not full nodes)
  209. this.knownNames = new Set(this.queries.getAllNodeNames());
  210. this.cachesWarmed = true;
  211. }
  212. /**
  213. * Clear internal caches
  214. */
  215. clearCaches(): void {
  216. this.nodeCache.clear();
  217. this.fileCache.clear();
  218. this.importMappingCache.clear();
  219. this.reExportCache.clear();
  220. this.nameCache.clear();
  221. this.lowerNameCache.clear();
  222. this.qualifiedNameCache.clear();
  223. this.knownNames = null;
  224. this.knownFiles = null;
  225. this.cachesWarmed = false;
  226. }
  227. /**
  228. * Create the resolution context
  229. */
  230. private createContext(): ResolutionContext {
  231. return {
  232. getNodesInFile: (filePath: string) => {
  233. if (!this.nodeCache.has(filePath)) {
  234. this.nodeCache.set(filePath, this.queries.getNodesByFile(filePath));
  235. }
  236. return this.nodeCache.get(filePath)!;
  237. },
  238. getNodesByName: (name: string) => {
  239. const cached = this.nameCache.get(name);
  240. if (cached !== undefined) return cached;
  241. const result = this.queries.getNodesByName(name);
  242. this.nameCache.set(name, result);
  243. return result;
  244. },
  245. getNodesByQualifiedName: (qualifiedName: string) => {
  246. const cached = this.qualifiedNameCache.get(qualifiedName);
  247. if (cached !== undefined) return cached;
  248. const result = this.queries.getNodesByQualifiedNameExact(qualifiedName);
  249. this.qualifiedNameCache.set(qualifiedName, result);
  250. return result;
  251. },
  252. getNodesByKind: (kind: Node['kind']) => {
  253. return this.queries.getNodesByKind(kind);
  254. },
  255. fileExists: (filePath: string) => {
  256. // Check pre-built known files set first (O(1))
  257. if (this.knownFiles) {
  258. const normalized = filePath.replace(/\\/g, '/');
  259. if (this.knownFiles.has(filePath) || this.knownFiles.has(normalized)) {
  260. return true;
  261. }
  262. }
  263. // Fall back to filesystem for files not yet indexed
  264. const fullPath = path.join(this.projectRoot, filePath);
  265. try {
  266. return fs.existsSync(fullPath);
  267. } catch (error) {
  268. logDebug('Error checking file existence', { filePath, error: String(error) });
  269. return false;
  270. }
  271. },
  272. readFile: (filePath: string) => {
  273. if (this.fileCache.has(filePath)) {
  274. return this.fileCache.get(filePath)!;
  275. }
  276. const fullPath = path.join(this.projectRoot, filePath);
  277. try {
  278. const content = fs.readFileSync(fullPath, 'utf-8');
  279. this.fileCache.set(filePath, content);
  280. return content;
  281. } catch (error) {
  282. logDebug('Failed to read file for resolution', { filePath, error: String(error) });
  283. this.fileCache.set(filePath, null);
  284. return null;
  285. }
  286. },
  287. getProjectRoot: () => this.projectRoot,
  288. getAllFiles: () => {
  289. return this.queries.getAllFilePaths();
  290. },
  291. listDirectories: (relativePath: string) => {
  292. const target = relativePath === '.' || relativePath === ''
  293. ? this.projectRoot
  294. : path.join(this.projectRoot, relativePath);
  295. try {
  296. return fs
  297. .readdirSync(target, { withFileTypes: true })
  298. .filter((entry) => entry.isDirectory())
  299. .map((entry) => entry.name);
  300. } catch (error) {
  301. logDebug('Failed to list directory for resolution', {
  302. relativePath,
  303. error: String(error),
  304. });
  305. return [];
  306. }
  307. },
  308. getNodesByLowerName: (lowerName: string) => {
  309. const cached = this.lowerNameCache.get(lowerName);
  310. if (cached !== undefined) return cached;
  311. const result = this.queries.getNodesByLowerName(lowerName);
  312. this.lowerNameCache.set(lowerName, result);
  313. return result;
  314. },
  315. getImportMappings: (filePath: string, language) => {
  316. const cacheKey = filePath;
  317. const cached = this.importMappingCache.get(cacheKey);
  318. if (cached) return cached;
  319. const content = this.context.readFile(filePath);
  320. if (!content) {
  321. this.importMappingCache.set(cacheKey, []);
  322. return [];
  323. }
  324. const mappings = extractImportMappings(filePath, content, language);
  325. this.importMappingCache.set(cacheKey, mappings);
  326. return mappings;
  327. },
  328. getProjectAliases: () => {
  329. if (this.projectAliases === undefined) {
  330. this.projectAliases = loadProjectAliases(this.projectRoot);
  331. }
  332. return this.projectAliases;
  333. },
  334. getReExports: (filePath: string, language) => {
  335. const cached = this.reExportCache.get(filePath);
  336. if (cached) return cached;
  337. const content = this.context.readFile(filePath);
  338. if (!content) {
  339. this.reExportCache.set(filePath, []);
  340. return [];
  341. }
  342. const reExports = extractReExports(content, language);
  343. this.reExportCache.set(filePath, reExports);
  344. return reExports;
  345. },
  346. };
  347. }
  348. /**
  349. * Resolve all unresolved references
  350. */
  351. resolveAll(
  352. unresolvedRefs: UnresolvedReference[],
  353. onProgress?: (current: number, total: number) => void
  354. ): ResolutionResult {
  355. // Pre-load all nodes into memory for fast lookups
  356. this.warmCaches();
  357. const resolved: ResolvedRef[] = [];
  358. const unresolved: UnresolvedRef[] = [];
  359. const byMethod: Record<string, number> = {};
  360. // Convert to our internal format, using denormalized fields when available
  361. const refs: UnresolvedRef[] = unresolvedRefs.map((ref) => ({
  362. fromNodeId: ref.fromNodeId,
  363. referenceName: ref.referenceName,
  364. referenceKind: ref.referenceKind,
  365. line: ref.line,
  366. column: ref.column,
  367. filePath: ref.filePath || this.getFilePathFromNodeId(ref.fromNodeId),
  368. language: ref.language || this.getLanguageFromNodeId(ref.fromNodeId),
  369. }));
  370. const total = refs.length;
  371. let lastReportedPercent = -1;
  372. for (let i = 0; i < refs.length; i++) {
  373. const ref = refs[i]!; // Array index is guaranteed to be in bounds
  374. const result = this.resolveOne(ref);
  375. if (result) {
  376. resolved.push(result);
  377. byMethod[result.resolvedBy] = (byMethod[result.resolvedBy] || 0) + 1;
  378. } else {
  379. unresolved.push(ref);
  380. }
  381. // Report progress every 1% to avoid too many updates
  382. if (onProgress) {
  383. const currentPercent = Math.floor((i / total) * 100);
  384. if (currentPercent > lastReportedPercent) {
  385. lastReportedPercent = currentPercent;
  386. onProgress(i + 1, total);
  387. }
  388. }
  389. }
  390. // Final progress report
  391. if (onProgress && total > 0) {
  392. onProgress(total, total);
  393. }
  394. return {
  395. resolved,
  396. unresolved,
  397. stats: {
  398. total: refs.length,
  399. resolved: resolved.length,
  400. unresolved: unresolved.length,
  401. byMethod,
  402. },
  403. };
  404. }
  405. /**
  406. * Check if a reference name has any possible match in the codebase.
  407. * Uses the pre-built knownNames set to skip expensive resolution
  408. * for names that definitely don't exist as symbols.
  409. */
  410. private hasAnyPossibleMatch(name: string): boolean {
  411. if (!this.knownNames) return true; // no pre-filter available
  412. // Direct name match
  413. if (this.knownNames.has(name)) return true;
  414. // For qualified names like "obj.method" or "Class::method", check the parts
  415. const dotIdx = name.indexOf('.');
  416. if (dotIdx > 0) {
  417. const receiver = name.substring(0, dotIdx);
  418. const member = name.substring(dotIdx + 1);
  419. if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
  420. // Also check capitalized receiver (instance-method resolution)
  421. const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
  422. if (this.knownNames.has(capitalized)) return true;
  423. }
  424. const colonIdx = name.indexOf('::');
  425. if (colonIdx > 0) {
  426. const receiver = name.substring(0, colonIdx);
  427. const member = name.substring(colonIdx + 2);
  428. if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
  429. }
  430. // For path-like references (e.g., "snippets/drawer-menu.liquid"), check the filename
  431. const slashIdx = name.lastIndexOf('/');
  432. if (slashIdx > 0) {
  433. const fileName = name.substring(slashIdx + 1);
  434. if (this.knownNames.has(fileName)) return true;
  435. }
  436. return false;
  437. }
  438. /**
  439. * Does `ref.referenceName` match an import declared in its containing
  440. * file? Used as a pre-filter escape so re-export chain resolution
  441. * still gets a chance when the name has no project-wide declaration.
  442. */
  443. private matchesAnyImport(ref: UnresolvedRef): boolean {
  444. const imports = this.context.getImportMappings(ref.filePath, ref.language);
  445. if (imports.length === 0) return false;
  446. for (const imp of imports) {
  447. if (
  448. imp.localName === ref.referenceName ||
  449. ref.referenceName.startsWith(imp.localName + '.')
  450. ) {
  451. return true;
  452. }
  453. }
  454. return false;
  455. }
  456. /**
  457. * Resolve a single reference
  458. */
  459. resolveOne(ref: UnresolvedRef): ResolvedRef | null {
  460. // Skip built-in/external references
  461. if (this.isBuiltInOrExternal(ref)) {
  462. return null;
  463. }
  464. // Fast pre-filter: skip if no symbol with this name exists anywhere
  465. // AND the name doesn't match a local import. The import escape is
  466. // necessary because re-export rename chains (`import { login }
  467. // from './barrel'` where the barrel has `export { signIn as login }
  468. // from './auth'`) intentionally call a name that has no
  469. // declaration anywhere — only the renamed upstream symbol does.
  470. if (
  471. !this.hasAnyPossibleMatch(ref.referenceName) &&
  472. !this.matchesAnyImport(ref) &&
  473. !this.frameworks.some((f) => f.claimsReference?.(ref.referenceName))
  474. ) {
  475. return null;
  476. }
  477. const candidates: ResolvedRef[] = [];
  478. // Strategy 1: Try framework-specific resolution
  479. for (const framework of this.frameworks) {
  480. const result = framework.resolve(ref, this.context);
  481. if (result) {
  482. if (result.confidence >= 0.9) return result; // High confidence, return immediately
  483. candidates.push(result);
  484. }
  485. }
  486. // Strategy 2: Try import-based resolution
  487. const importResult = resolveViaImport(ref, this.context);
  488. if (importResult) {
  489. if (importResult.confidence >= 0.9) return importResult;
  490. candidates.push(importResult);
  491. }
  492. // Strategy 3: Try name matching
  493. const nameResult = matchReference(ref, this.context);
  494. if (nameResult) {
  495. candidates.push(nameResult);
  496. }
  497. if (candidates.length === 0) return null;
  498. // Return highest confidence candidate
  499. return candidates.reduce((best, curr) =>
  500. curr.confidence > best.confidence ? curr : best
  501. );
  502. }
  503. /**
  504. * Create edges from resolved references
  505. */
  506. createEdges(resolved: ResolvedRef[]): Edge[] {
  507. return resolved.map((ref) => {
  508. let kind = ref.original.referenceKind;
  509. // Promote "extends" to "implements" when a class/struct targets an interface
  510. if (kind === 'extends') {
  511. const targetNode = this.queries.getNodeById(ref.targetNodeId);
  512. if (targetNode && (targetNode.kind === 'interface' || targetNode.kind === 'protocol')) {
  513. const sourceNode = this.queries.getNodeById(ref.original.fromNodeId);
  514. if (sourceNode && sourceNode.kind !== 'interface' && sourceNode.kind !== 'protocol') {
  515. kind = 'implements';
  516. }
  517. }
  518. }
  519. // Promote "calls" to "instantiates" when the resolved target is a
  520. // class/struct. Languages without a `new` keyword (Python, Ruby)
  521. // express instantiation as `Foo()` — extraction can't tell that
  522. // apart from a function call without symbol info, but resolution
  523. // can: if `Foo` resolves to a class, the call IS an instantiation.
  524. if (kind === 'calls') {
  525. const targetNode = this.queries.getNodeById(ref.targetNodeId);
  526. if (targetNode && (targetNode.kind === 'class' || targetNode.kind === 'struct')) {
  527. kind = 'instantiates';
  528. }
  529. }
  530. return {
  531. source: ref.original.fromNodeId,
  532. target: ref.targetNodeId,
  533. kind,
  534. line: ref.original.line,
  535. column: ref.original.column,
  536. metadata: {
  537. confidence: ref.confidence,
  538. resolvedBy: ref.resolvedBy,
  539. },
  540. };
  541. });
  542. }
  543. /**
  544. * Defense-in-depth: drop edges whose source or target is no longer in
  545. * the nodes table. PR #62 (issue #42) applied this filter at the
  546. * extraction-layer `insertEdges` site; #455 reports the same
  547. * `FOREIGN KEY constraint failed` reappearing here at the
  548. * resolution-layer site during watch sync, where a resolver lookup that
  549. * crosses a framework-specific cache can hand us a target whose node
  550. * was removed by a concurrent file rewrite. One batched, cache-aware
  551. * `getNodesByIds` query is enough to skip those edges quietly instead
  552. * of aborting the whole sync.
  553. */
  554. private filterEdgesByExistingNodes(edges: Edge[]): Edge[] {
  555. if (edges.length === 0) return edges;
  556. const allIds = new Set<string>();
  557. for (const e of edges) {
  558. allIds.add(e.source);
  559. allIds.add(e.target);
  560. }
  561. const existing = this.queries.getNodesByIds([...allIds]);
  562. return edges.filter((e) => existing.has(e.source) && existing.has(e.target));
  563. }
  564. /**
  565. * Resolve and persist edges to database
  566. */
  567. resolveAndPersist(
  568. unresolvedRefs: UnresolvedReference[],
  569. onProgress?: (current: number, total: number) => void
  570. ): ResolutionResult {
  571. const result = this.resolveAll(unresolvedRefs, onProgress);
  572. // Create edges from resolved references
  573. const edges = this.createEdges(result.resolved);
  574. // Insert edges into database
  575. const validEdges = this.filterEdgesByExistingNodes(edges);
  576. if (validEdges.length > 0) {
  577. this.queries.insertEdges(validEdges);
  578. }
  579. // Clean up resolved refs from unresolved_refs table so metrics are accurate
  580. if (result.resolved.length > 0) {
  581. this.queries.deleteSpecificResolvedReferences(
  582. result.resolved.map((r) => ({
  583. fromNodeId: r.original.fromNodeId,
  584. referenceName: r.original.referenceName,
  585. referenceKind: r.original.referenceKind,
  586. }))
  587. );
  588. }
  589. return result;
  590. }
  591. /**
  592. * Resolve and persist in batches to keep memory bounded.
  593. * Processes unresolved references in chunks, persisting edges and cleaning
  594. * up resolved refs after each batch to avoid accumulating large arrays.
  595. */
  596. async resolveAndPersistBatched(
  597. onProgress?: (current: number, total: number) => void,
  598. batchSize: number = 5000
  599. ): Promise<ResolutionResult> {
  600. this.warmCaches();
  601. const total = this.queries.getUnresolvedReferencesCount();
  602. let processed = 0;
  603. const aggregateStats = {
  604. total: 0,
  605. resolved: 0,
  606. unresolved: 0,
  607. byMethod: {} as Record<string, number>,
  608. };
  609. // Process in batches. We always read from offset 0 because resolved refs
  610. // are deleted after each batch, shifting the remaining rows forward.
  611. while (true) {
  612. const batch = this.queries.getUnresolvedReferencesBatch(0, batchSize);
  613. if (batch.length === 0) break;
  614. const result = this.resolveAll(batch);
  615. // Persist edges immediately
  616. const edges = this.createEdges(result.resolved);
  617. const validEdges = this.filterEdgesByExistingNodes(edges);
  618. if (validEdges.length > 0) {
  619. this.queries.insertEdges(validEdges);
  620. }
  621. // Clean up resolved refs so they don't appear in the next batch
  622. if (result.resolved.length > 0) {
  623. this.queries.deleteSpecificResolvedReferences(
  624. result.resolved.map((r) => ({
  625. fromNodeId: r.original.fromNodeId,
  626. referenceName: r.original.referenceName,
  627. referenceKind: r.original.referenceKind,
  628. }))
  629. );
  630. }
  631. // Delete unresolvable refs from this batch to avoid re-processing them
  632. if (result.unresolved.length > 0) {
  633. this.queries.deleteSpecificResolvedReferences(
  634. result.unresolved.map((r) => ({
  635. fromNodeId: r.fromNodeId,
  636. referenceName: r.referenceName,
  637. referenceKind: r.referenceKind,
  638. }))
  639. );
  640. }
  641. // Aggregate stats
  642. aggregateStats.total += result.stats.total;
  643. aggregateStats.resolved += result.stats.resolved;
  644. aggregateStats.unresolved += result.stats.unresolved;
  645. for (const [method, count] of Object.entries(result.stats.byMethod)) {
  646. aggregateStats.byMethod[method] = (aggregateStats.byMethod[method] || 0) + count;
  647. }
  648. processed += batch.length;
  649. onProgress?.(processed, total);
  650. // Yield so progress UI can render between batches
  651. await new Promise(resolve => setImmediate(resolve));
  652. // If nothing was resolved or removed in this batch, we'd loop forever
  653. // on the same rows. Break to avoid infinite loop.
  654. if (result.resolved.length === 0 && result.unresolved.length === batch.length) {
  655. break;
  656. }
  657. }
  658. // Dynamic-edge synthesis: now that all base `calls` edges are persisted,
  659. // synthesize observer/callback dispatch edges (dispatcher → registered
  660. // callbacks) that static parsing leaves out. Best-effort — never fail the
  661. // index on it. See docs/design/callback-edge-synthesis.md.
  662. try {
  663. aggregateStats.byMethod['callback-synthesis'] = synthesizeCallbackEdges(this.queries, this.context);
  664. } catch {
  665. // synthesis is additive and optional; ignore failures
  666. }
  667. return {
  668. resolved: [],
  669. unresolved: [],
  670. stats: aggregateStats,
  671. };
  672. }
  673. /**
  674. * Get detected frameworks
  675. */
  676. getDetectedFrameworks(): string[] {
  677. return this.frameworks.map((f) => f.name);
  678. }
  679. /**
  680. * Check if reference is to a built-in or external symbol
  681. */
  682. private isBuiltInOrExternal(ref: UnresolvedRef): boolean {
  683. const name = ref.referenceName;
  684. const isJsTs = ref.language === 'typescript' || ref.language === 'javascript'
  685. || ref.language === 'tsx' || ref.language === 'jsx';
  686. // JavaScript/TypeScript built-ins
  687. if (isJsTs && JS_BUILT_INS.has(name)) {
  688. return true;
  689. }
  690. // Common JS/TS library calls (console.log, Math.floor, JSON.parse)
  691. if (isJsTs && (name.startsWith('console.') || name.startsWith('Math.') || name.startsWith('JSON.'))) {
  692. return true;
  693. }
  694. // React hooks from React itself
  695. if (isJsTs && REACT_HOOKS.has(name)) {
  696. return true;
  697. }
  698. // Python built-ins (bare calls only — dotted calls like console.print are method calls)
  699. if (ref.language === 'python' && PYTHON_BUILT_INS.has(name)) {
  700. return true;
  701. }
  702. // Python built-in method calls (e.g., list.extend, dict.update)
  703. if (ref.language === 'python') {
  704. const dotIdx = name.indexOf('.');
  705. if (dotIdx > 0) {
  706. const receiver = name.substring(0, dotIdx);
  707. const method = name.substring(dotIdx + 1);
  708. // Filter calls on built-in types (list.append, dict.update, etc.)
  709. if (PYTHON_BUILT_IN_TYPES.has(receiver)) {
  710. return true;
  711. }
  712. // Filter built-in methods on non-class receivers
  713. // (e.g., items.append where items is a local list variable)
  714. // But allow if the capitalized receiver matches a known codebase class
  715. if (PYTHON_BUILT_IN_METHODS.has(method)) {
  716. const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
  717. if (!this.knownNames?.has(capitalized)) {
  718. return true;
  719. }
  720. }
  721. }
  722. // A bare name colliding with a builtin method (index, get, update, count…)
  723. // is only a builtin when NOTHING in the codebase declares it. A declared
  724. // symbol with that exact name — e.g. a Flask/FastAPI view `def index()` or
  725. // `def get()` — is a real reference target. Mirrors the knownNames guard on
  726. // the dotted branch above; without it, every handler named after a builtin
  727. // method silently loses its route→handler edge.
  728. if (PYTHON_BUILT_IN_METHODS.has(name) && !this.knownNames?.has(name)) {
  729. return true;
  730. }
  731. }
  732. // Go standard library packages — refs like "fmt.Println", "http.ListenAndServe", etc.
  733. if (ref.language === 'go') {
  734. const dotIdx = name.indexOf('.');
  735. if (dotIdx > 0) {
  736. const pkg = name.substring(0, dotIdx);
  737. if (GO_STDLIB_PACKAGES.has(pkg)) {
  738. return true;
  739. }
  740. }
  741. if (GO_BUILT_INS.has(name)) {
  742. return true;
  743. }
  744. }
  745. // Pascal/Delphi built-ins and standard library units
  746. if (ref.language === 'pascal') {
  747. if (PASCAL_UNIT_PREFIXES.some((p) => name.startsWith(p))) {
  748. return true;
  749. }
  750. if (PASCAL_BUILT_INS.has(name)) {
  751. return true;
  752. }
  753. }
  754. return false;
  755. }
  756. /**
  757. * Get file path from node ID
  758. */
  759. private getFilePathFromNodeId(nodeId: string): string {
  760. const node = this.queries.getNodeById(nodeId);
  761. return node?.filePath || '';
  762. }
  763. /**
  764. * Get language from node ID
  765. */
  766. private getLanguageFromNodeId(nodeId: string): UnresolvedRef['language'] {
  767. const node = this.queries.getNodeById(nodeId);
  768. return node?.language || 'unknown';
  769. }
  770. }
  771. /**
  772. * Create a reference resolver instance
  773. */
  774. export function createResolver(projectRoot: string, queries: QueryBuilder): ReferenceResolver {
  775. const resolver = new ReferenceResolver(projectRoot, queries);
  776. resolver.initialize();
  777. return resolver;
  778. }