index.ts 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823
  1. /**
  2. * Reference Resolution Orchestrator
  3. *
  4. * Coordinates all reference resolution strategies.
  5. */
  6. import * as fs from 'fs';
  7. import * as path from 'path';
  8. import { Node, UnresolvedReference, Edge } from '../types';
  9. import { QueryBuilder } from '../db/queries';
  10. import {
  11. UnresolvedRef,
  12. ResolvedRef,
  13. ResolutionResult,
  14. ResolutionContext,
  15. FrameworkResolver,
  16. ImportMapping,
  17. } from './types';
  18. import { matchReference } from './name-matcher';
  19. import { resolveViaImport, extractImportMappings, extractReExports } from './import-resolver';
  20. import { detectFrameworks } from './frameworks';
  21. import { synthesizeCallbackEdges } from './callback-synthesizer';
  22. import { loadProjectAliases, type AliasMap } from './path-aliases';
  23. import { logDebug } from '../errors';
  24. import type { ReExport } from './types';
  25. import { LRUCache } from './lru-cache';
  26. /**
  27. * Cache size limits. Each per-resolver cache is bounded so memory
  28. * stays flat on large codebases (20k+ files). Sizes were chosen to
  29. * cover the working set for typical resolution batches without
  30. * exceeding a few hundred MB worst-case. Override via the env var
  31. * `CODEGRAPH_RESOLVER_CACHE_SIZE` (single integer applied to all
  32. * caches) when tuning for very large or very small projects.
  33. */
  34. const DEFAULT_CACHE_LIMIT = 5_000;
  35. function resolveCacheLimit(): number {
  36. const raw = process.env.CODEGRAPH_RESOLVER_CACHE_SIZE;
  37. if (!raw) return DEFAULT_CACHE_LIMIT;
  38. const parsed = Number.parseInt(raw, 10);
  39. if (Number.isFinite(parsed) && parsed > 0) return parsed;
  40. return DEFAULT_CACHE_LIMIT;
  41. }
  42. // Re-export types
  43. export * from './types';
  44. // Pre-built Sets for O(1) built-in lookups (allocated once, shared across all instances)
  45. const JS_BUILT_INS = new Set([
  46. 'console', 'window', 'document', 'global', 'process',
  47. 'Promise', 'Array', 'Object', 'String', 'Number', 'Boolean',
  48. 'Date', 'Math', 'JSON', 'RegExp', 'Error', 'Map', 'Set',
  49. 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
  50. 'fetch', 'require', 'module', 'exports', '__dirname', '__filename',
  51. ]);
  52. const REACT_HOOKS = new Set([
  53. 'useState', 'useEffect', 'useContext', 'useReducer', 'useCallback',
  54. 'useMemo', 'useRef', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
  55. ]);
  56. const PYTHON_BUILT_INS = new Set([
  57. 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
  58. 'open', 'input', 'type', 'isinstance', 'hasattr', 'getattr', 'setattr',
  59. 'super', 'self', 'cls', 'None', 'True', 'False',
  60. ]);
  61. const PYTHON_BUILT_IN_TYPES = new Set([
  62. 'list', 'dict', 'set', 'tuple', 'str', 'int', 'float', 'bool',
  63. 'bytes', 'bytearray', 'frozenset', 'object', 'super',
  64. ]);
  65. const PYTHON_BUILT_IN_METHODS = new Set([
  66. 'append', 'extend', 'insert', 'remove', 'pop', 'clear', 'sort', 'reverse', 'copy',
  67. 'update', 'keys', 'values', 'items', 'get',
  68. 'add', 'discard', 'union', 'intersection', 'difference',
  69. 'split', 'join', 'strip', 'lstrip', 'rstrip', 'replace', 'lower', 'upper',
  70. 'startswith', 'endswith', 'find', 'index', 'count', 'encode', 'decode',
  71. 'format', 'isdigit', 'isalpha', 'isalnum',
  72. 'read', 'write', 'readline', 'readlines', 'close', 'flush', 'seek',
  73. ]);
  74. const GO_STDLIB_PACKAGES = new Set([
  75. 'fmt', 'os', 'io', 'net', 'http', 'log', 'math', 'sort', 'sync',
  76. 'time', 'path', 'bytes', 'strings', 'strconv', 'errors', 'context',
  77. 'json', 'xml', 'csv', 'html', 'template', 'regexp', 'reflect',
  78. 'runtime', 'testing', 'flag', 'bufio', 'crypto', 'encoding',
  79. 'filepath', 'hash', 'mime', 'rand', 'signal', 'sql', 'syscall',
  80. 'unicode', 'unsafe', 'atomic', 'binary', 'debug', 'exec', 'heap',
  81. 'ring', 'scanner', 'tar', 'zip', 'gzip', 'zlib', 'tls', 'url',
  82. 'user', 'pprof', 'trace', 'ast', 'build', 'parser', 'printer',
  83. 'token', 'types', 'cgo', 'plugin', 'race', 'ioutil',
  84. // Kubernetes-common stdlib aliases
  85. 'utilruntime', 'utilwait', 'utilnet',
  86. ]);
  87. const GO_BUILT_INS = new Set([
  88. 'make', 'new', 'len', 'cap', 'append', 'copy', 'delete', 'close',
  89. 'panic', 'recover', 'print', 'println', 'complex', 'real', 'imag',
  90. 'error', 'nil', 'true', 'false', 'iota',
  91. 'int', 'int8', 'int16', 'int32', 'int64',
  92. 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uintptr',
  93. 'float32', 'float64', 'complex64', 'complex128',
  94. 'string', 'bool', 'byte', 'rune', 'any',
  95. ]);
  96. const PASCAL_UNIT_PREFIXES = [
  97. 'System.', 'Winapi.', 'Vcl.', 'Fmx.', 'Data.', 'Datasnap.',
  98. 'Soap.', 'Xml.', 'Web.', 'REST.', 'FireDAC.', 'IBX.',
  99. 'IdHTTP', 'IdTCP', 'IdSSL',
  100. ];
  101. const PASCAL_BUILT_INS = new Set([
  102. 'System', 'SysUtils', 'Classes', 'Types', 'Variants', 'StrUtils',
  103. 'Math', 'DateUtils', 'IOUtils', 'Generics.Collections', 'Generics.Defaults',
  104. 'Rtti', 'TypInfo', 'SyncObjs', 'RegularExpressions',
  105. 'SysInit', 'Windows', 'Messages', 'Graphics', 'Controls', 'Forms',
  106. 'Dialogs', 'StdCtrls', 'ExtCtrls', 'ComCtrls', 'Menus', 'ActnList',
  107. 'WriteLn', 'Write', 'ReadLn', 'Read', 'Inc', 'Dec', 'Ord', 'Chr',
  108. 'Length', 'SetLength', 'High', 'Low', 'Assigned', 'FreeAndNil',
  109. 'Format', 'IntToStr', 'StrToInt', 'FloatToStr', 'StrToFloat',
  110. 'Trim', 'UpperCase', 'LowerCase', 'Pos', 'Copy', 'Delete', 'Insert',
  111. 'Now', 'Date', 'Time', 'DateToStr', 'StrToDate',
  112. 'Raise', 'Exit', 'Break', 'Continue', 'Abort',
  113. 'True', 'False', 'nil', 'Self', 'Result',
  114. 'Create', 'Destroy', 'Free',
  115. 'TObject', 'TComponent', 'TPersistent', 'TInterfacedObject',
  116. 'TList', 'TStringList', 'TStrings', 'TStream', 'TMemoryStream', 'TFileStream',
  117. 'Exception', 'EAbort', 'EConvertError', 'EAccessViolation',
  118. 'IInterface', 'IUnknown',
  119. ]);
  120. /**
  121. * Reference Resolver
  122. *
  123. * Orchestrates reference resolution using multiple strategies.
  124. */
  125. export class ReferenceResolver {
  126. private projectRoot: string;
  127. private queries: QueryBuilder;
  128. private context: ResolutionContext;
  129. private frameworks: FrameworkResolver[] = [];
  130. // All per-resolver caches are LRU-bounded. Previously these were
  131. // unbounded Maps that grew with every distinct lookup and OOM'd on
  132. // codebases with 20k+ files (see issue: unbounded cache growth).
  133. private nodeCache: LRUCache<string, Node[]>; // per-file node cache
  134. private fileCache: LRUCache<string, string | null>; // per-file content cache
  135. private importMappingCache: LRUCache<string, ImportMapping[]>;
  136. private reExportCache: LRUCache<string, ReExport[]>;
  137. private nameCache: LRUCache<string, Node[]>; // name → nodes cache
  138. private lowerNameCache: LRUCache<string, Node[]>; // lower(name) → nodes cache
  139. private qualifiedNameCache: LRUCache<string, Node[]>; // qualified_name → nodes cache
  140. private knownNames: Set<string> | null = null; // all known symbol names for fast pre-filtering
  141. private knownFiles: Set<string> | null = null;
  142. private cachesWarmed = false;
  143. // tsconfig/jsconfig path-alias map. `undefined` = not yet computed,
  144. // `null` = computed and absent. Treated as immutable for the
  145. // resolver's lifetime; callers re-create the resolver if config changes.
  146. private projectAliases: AliasMap | null | undefined = undefined;
  147. constructor(projectRoot: string, queries: QueryBuilder) {
  148. this.projectRoot = projectRoot;
  149. this.queries = queries;
  150. const limit = resolveCacheLimit();
  151. // The content cache is heavier (full file text), so we give it a
  152. // smaller budget than the metadata caches.
  153. const contentLimit = Math.max(64, Math.floor(limit / 5));
  154. this.nodeCache = new LRUCache(limit);
  155. this.fileCache = new LRUCache(contentLimit);
  156. this.importMappingCache = new LRUCache(limit);
  157. this.reExportCache = new LRUCache(limit);
  158. this.nameCache = new LRUCache(limit);
  159. this.lowerNameCache = new LRUCache(limit);
  160. this.qualifiedNameCache = new LRUCache(limit);
  161. this.context = this.createContext();
  162. }
  163. /**
  164. * Initialize the resolver (detect frameworks, etc.)
  165. */
  166. initialize(): void {
  167. this.frameworks = detectFrameworks(this.context);
  168. this.clearCaches();
  169. }
  170. /**
  171. * Pre-build lightweight caches for resolution.
  172. * Node lookups are now handled by indexed SQLite queries instead of
  173. * loading all nodes into memory (which caused OOM on large codebases).
  174. * We cache the set of known symbol names for fast pre-filtering.
  175. */
  176. warmCaches(): void {
  177. if (this.cachesWarmed) return;
  178. // Only cache the set of known file paths (lightweight string set)
  179. this.knownFiles = new Set(this.queries.getAllFilePaths());
  180. // Cache all distinct symbol names for fast pre-filtering (just strings, not full nodes)
  181. this.knownNames = new Set(this.queries.getAllNodeNames());
  182. this.cachesWarmed = true;
  183. }
  184. /**
  185. * Clear internal caches
  186. */
  187. clearCaches(): void {
  188. this.nodeCache.clear();
  189. this.fileCache.clear();
  190. this.importMappingCache.clear();
  191. this.reExportCache.clear();
  192. this.nameCache.clear();
  193. this.lowerNameCache.clear();
  194. this.qualifiedNameCache.clear();
  195. this.knownNames = null;
  196. this.knownFiles = null;
  197. this.cachesWarmed = false;
  198. }
  199. /**
  200. * Create the resolution context
  201. */
  202. private createContext(): ResolutionContext {
  203. return {
  204. getNodesInFile: (filePath: string) => {
  205. if (!this.nodeCache.has(filePath)) {
  206. this.nodeCache.set(filePath, this.queries.getNodesByFile(filePath));
  207. }
  208. return this.nodeCache.get(filePath)!;
  209. },
  210. getNodesByName: (name: string) => {
  211. const cached = this.nameCache.get(name);
  212. if (cached !== undefined) return cached;
  213. const result = this.queries.getNodesByName(name);
  214. this.nameCache.set(name, result);
  215. return result;
  216. },
  217. getNodesByQualifiedName: (qualifiedName: string) => {
  218. const cached = this.qualifiedNameCache.get(qualifiedName);
  219. if (cached !== undefined) return cached;
  220. const result = this.queries.getNodesByQualifiedNameExact(qualifiedName);
  221. this.qualifiedNameCache.set(qualifiedName, result);
  222. return result;
  223. },
  224. getNodesByKind: (kind: Node['kind']) => {
  225. return this.queries.getNodesByKind(kind);
  226. },
  227. fileExists: (filePath: string) => {
  228. // Check pre-built known files set first (O(1))
  229. if (this.knownFiles) {
  230. const normalized = filePath.replace(/\\/g, '/');
  231. if (this.knownFiles.has(filePath) || this.knownFiles.has(normalized)) {
  232. return true;
  233. }
  234. }
  235. // Fall back to filesystem for files not yet indexed
  236. const fullPath = path.join(this.projectRoot, filePath);
  237. try {
  238. return fs.existsSync(fullPath);
  239. } catch (error) {
  240. logDebug('Error checking file existence', { filePath, error: String(error) });
  241. return false;
  242. }
  243. },
  244. readFile: (filePath: string) => {
  245. if (this.fileCache.has(filePath)) {
  246. return this.fileCache.get(filePath)!;
  247. }
  248. const fullPath = path.join(this.projectRoot, filePath);
  249. try {
  250. const content = fs.readFileSync(fullPath, 'utf-8');
  251. this.fileCache.set(filePath, content);
  252. return content;
  253. } catch (error) {
  254. logDebug('Failed to read file for resolution', { filePath, error: String(error) });
  255. this.fileCache.set(filePath, null);
  256. return null;
  257. }
  258. },
  259. getProjectRoot: () => this.projectRoot,
  260. getAllFiles: () => {
  261. return this.queries.getAllFilePaths();
  262. },
  263. listDirectories: (relativePath: string) => {
  264. const target = relativePath === '.' || relativePath === ''
  265. ? this.projectRoot
  266. : path.join(this.projectRoot, relativePath);
  267. try {
  268. return fs
  269. .readdirSync(target, { withFileTypes: true })
  270. .filter((entry) => entry.isDirectory())
  271. .map((entry) => entry.name);
  272. } catch (error) {
  273. logDebug('Failed to list directory for resolution', {
  274. relativePath,
  275. error: String(error),
  276. });
  277. return [];
  278. }
  279. },
  280. getNodesByLowerName: (lowerName: string) => {
  281. const cached = this.lowerNameCache.get(lowerName);
  282. if (cached !== undefined) return cached;
  283. const result = this.queries.getNodesByLowerName(lowerName);
  284. this.lowerNameCache.set(lowerName, result);
  285. return result;
  286. },
  287. getImportMappings: (filePath: string, language) => {
  288. const cacheKey = filePath;
  289. const cached = this.importMappingCache.get(cacheKey);
  290. if (cached) return cached;
  291. const content = this.context.readFile(filePath);
  292. if (!content) {
  293. this.importMappingCache.set(cacheKey, []);
  294. return [];
  295. }
  296. const mappings = extractImportMappings(filePath, content, language);
  297. this.importMappingCache.set(cacheKey, mappings);
  298. return mappings;
  299. },
  300. getProjectAliases: () => {
  301. if (this.projectAliases === undefined) {
  302. this.projectAliases = loadProjectAliases(this.projectRoot);
  303. }
  304. return this.projectAliases;
  305. },
  306. getReExports: (filePath: string, language) => {
  307. const cached = this.reExportCache.get(filePath);
  308. if (cached) return cached;
  309. const content = this.context.readFile(filePath);
  310. if (!content) {
  311. this.reExportCache.set(filePath, []);
  312. return [];
  313. }
  314. const reExports = extractReExports(content, language);
  315. this.reExportCache.set(filePath, reExports);
  316. return reExports;
  317. },
  318. };
  319. }
  320. /**
  321. * Resolve all unresolved references
  322. */
  323. resolveAll(
  324. unresolvedRefs: UnresolvedReference[],
  325. onProgress?: (current: number, total: number) => void
  326. ): ResolutionResult {
  327. // Pre-load all nodes into memory for fast lookups
  328. this.warmCaches();
  329. const resolved: ResolvedRef[] = [];
  330. const unresolved: UnresolvedRef[] = [];
  331. const byMethod: Record<string, number> = {};
  332. // Convert to our internal format, using denormalized fields when available
  333. const refs: UnresolvedRef[] = unresolvedRefs.map((ref) => ({
  334. fromNodeId: ref.fromNodeId,
  335. referenceName: ref.referenceName,
  336. referenceKind: ref.referenceKind,
  337. line: ref.line,
  338. column: ref.column,
  339. filePath: ref.filePath || this.getFilePathFromNodeId(ref.fromNodeId),
  340. language: ref.language || this.getLanguageFromNodeId(ref.fromNodeId),
  341. }));
  342. const total = refs.length;
  343. let lastReportedPercent = -1;
  344. for (let i = 0; i < refs.length; i++) {
  345. const ref = refs[i]!; // Array index is guaranteed to be in bounds
  346. const result = this.resolveOne(ref);
  347. if (result) {
  348. resolved.push(result);
  349. byMethod[result.resolvedBy] = (byMethod[result.resolvedBy] || 0) + 1;
  350. } else {
  351. unresolved.push(ref);
  352. }
  353. // Report progress every 1% to avoid too many updates
  354. if (onProgress) {
  355. const currentPercent = Math.floor((i / total) * 100);
  356. if (currentPercent > lastReportedPercent) {
  357. lastReportedPercent = currentPercent;
  358. onProgress(i + 1, total);
  359. }
  360. }
  361. }
  362. // Final progress report
  363. if (onProgress && total > 0) {
  364. onProgress(total, total);
  365. }
  366. return {
  367. resolved,
  368. unresolved,
  369. stats: {
  370. total: refs.length,
  371. resolved: resolved.length,
  372. unresolved: unresolved.length,
  373. byMethod,
  374. },
  375. };
  376. }
  377. /**
  378. * Check if a reference name has any possible match in the codebase.
  379. * Uses the pre-built knownNames set to skip expensive resolution
  380. * for names that definitely don't exist as symbols.
  381. */
  382. private hasAnyPossibleMatch(name: string): boolean {
  383. if (!this.knownNames) return true; // no pre-filter available
  384. // Direct name match
  385. if (this.knownNames.has(name)) return true;
  386. // For qualified names like "obj.method" or "Class::method", check the parts
  387. const dotIdx = name.indexOf('.');
  388. if (dotIdx > 0) {
  389. const receiver = name.substring(0, dotIdx);
  390. const member = name.substring(dotIdx + 1);
  391. if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
  392. // Also check capitalized receiver (instance-method resolution)
  393. const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
  394. if (this.knownNames.has(capitalized)) return true;
  395. }
  396. const colonIdx = name.indexOf('::');
  397. if (colonIdx > 0) {
  398. const receiver = name.substring(0, colonIdx);
  399. const member = name.substring(colonIdx + 2);
  400. if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
  401. }
  402. // For path-like references (e.g., "snippets/drawer-menu.liquid"), check the filename
  403. const slashIdx = name.lastIndexOf('/');
  404. if (slashIdx > 0) {
  405. const fileName = name.substring(slashIdx + 1);
  406. if (this.knownNames.has(fileName)) return true;
  407. }
  408. return false;
  409. }
  410. /**
  411. * Does `ref.referenceName` match an import declared in its containing
  412. * file? Used as a pre-filter escape so re-export chain resolution
  413. * still gets a chance when the name has no project-wide declaration.
  414. */
  415. private matchesAnyImport(ref: UnresolvedRef): boolean {
  416. const imports = this.context.getImportMappings(ref.filePath, ref.language);
  417. if (imports.length === 0) return false;
  418. for (const imp of imports) {
  419. if (
  420. imp.localName === ref.referenceName ||
  421. ref.referenceName.startsWith(imp.localName + '.')
  422. ) {
  423. return true;
  424. }
  425. }
  426. return false;
  427. }
  428. /**
  429. * Resolve a single reference
  430. */
  431. resolveOne(ref: UnresolvedRef): ResolvedRef | null {
  432. // Skip built-in/external references
  433. if (this.isBuiltInOrExternal(ref)) {
  434. return null;
  435. }
  436. // Fast pre-filter: skip if no symbol with this name exists anywhere
  437. // AND the name doesn't match a local import. The import escape is
  438. // necessary because re-export rename chains (`import { login }
  439. // from './barrel'` where the barrel has `export { signIn as login }
  440. // from './auth'`) intentionally call a name that has no
  441. // declaration anywhere — only the renamed upstream symbol does.
  442. if (
  443. !this.hasAnyPossibleMatch(ref.referenceName) &&
  444. !this.matchesAnyImport(ref) &&
  445. !this.frameworks.some((f) => f.claimsReference?.(ref.referenceName))
  446. ) {
  447. return null;
  448. }
  449. const candidates: ResolvedRef[] = [];
  450. // Strategy 1: Try framework-specific resolution
  451. for (const framework of this.frameworks) {
  452. const result = framework.resolve(ref, this.context);
  453. if (result) {
  454. if (result.confidence >= 0.9) return result; // High confidence, return immediately
  455. candidates.push(result);
  456. }
  457. }
  458. // Strategy 2: Try import-based resolution
  459. const importResult = resolveViaImport(ref, this.context);
  460. if (importResult) {
  461. if (importResult.confidence >= 0.9) return importResult;
  462. candidates.push(importResult);
  463. }
  464. // Strategy 3: Try name matching
  465. const nameResult = matchReference(ref, this.context);
  466. if (nameResult) {
  467. candidates.push(nameResult);
  468. }
  469. if (candidates.length === 0) return null;
  470. // Return highest confidence candidate
  471. return candidates.reduce((best, curr) =>
  472. curr.confidence > best.confidence ? curr : best
  473. );
  474. }
  475. /**
  476. * Create edges from resolved references
  477. */
  478. createEdges(resolved: ResolvedRef[]): Edge[] {
  479. return resolved.map((ref) => {
  480. let kind = ref.original.referenceKind;
  481. // Promote "extends" to "implements" when a class/struct targets an interface
  482. if (kind === 'extends') {
  483. const targetNode = this.queries.getNodeById(ref.targetNodeId);
  484. if (targetNode && (targetNode.kind === 'interface' || targetNode.kind === 'protocol')) {
  485. const sourceNode = this.queries.getNodeById(ref.original.fromNodeId);
  486. if (sourceNode && sourceNode.kind !== 'interface' && sourceNode.kind !== 'protocol') {
  487. kind = 'implements';
  488. }
  489. }
  490. }
  491. // Promote "calls" to "instantiates" when the resolved target is a
  492. // class/struct. Languages without a `new` keyword (Python, Ruby)
  493. // express instantiation as `Foo()` — extraction can't tell that
  494. // apart from a function call without symbol info, but resolution
  495. // can: if `Foo` resolves to a class, the call IS an instantiation.
  496. if (kind === 'calls') {
  497. const targetNode = this.queries.getNodeById(ref.targetNodeId);
  498. if (targetNode && (targetNode.kind === 'class' || targetNode.kind === 'struct')) {
  499. kind = 'instantiates';
  500. }
  501. }
  502. return {
  503. source: ref.original.fromNodeId,
  504. target: ref.targetNodeId,
  505. kind,
  506. line: ref.original.line,
  507. column: ref.original.column,
  508. metadata: {
  509. confidence: ref.confidence,
  510. resolvedBy: ref.resolvedBy,
  511. },
  512. };
  513. });
  514. }
  515. /**
  516. * Resolve and persist edges to database
  517. */
  518. resolveAndPersist(
  519. unresolvedRefs: UnresolvedReference[],
  520. onProgress?: (current: number, total: number) => void
  521. ): ResolutionResult {
  522. const result = this.resolveAll(unresolvedRefs, onProgress);
  523. // Create edges from resolved references
  524. const edges = this.createEdges(result.resolved);
  525. // Insert edges into database
  526. if (edges.length > 0) {
  527. this.queries.insertEdges(edges);
  528. }
  529. // Clean up resolved refs from unresolved_refs table so metrics are accurate
  530. if (result.resolved.length > 0) {
  531. this.queries.deleteSpecificResolvedReferences(
  532. result.resolved.map((r) => ({
  533. fromNodeId: r.original.fromNodeId,
  534. referenceName: r.original.referenceName,
  535. referenceKind: r.original.referenceKind,
  536. }))
  537. );
  538. }
  539. return result;
  540. }
  541. /**
  542. * Resolve and persist in batches to keep memory bounded.
  543. * Processes unresolved references in chunks, persisting edges and cleaning
  544. * up resolved refs after each batch to avoid accumulating large arrays.
  545. */
  546. async resolveAndPersistBatched(
  547. onProgress?: (current: number, total: number) => void,
  548. batchSize: number = 5000
  549. ): Promise<ResolutionResult> {
  550. this.warmCaches();
  551. const total = this.queries.getUnresolvedReferencesCount();
  552. let processed = 0;
  553. const aggregateStats = {
  554. total: 0,
  555. resolved: 0,
  556. unresolved: 0,
  557. byMethod: {} as Record<string, number>,
  558. };
  559. // Process in batches. We always read from offset 0 because resolved refs
  560. // are deleted after each batch, shifting the remaining rows forward.
  561. while (true) {
  562. const batch = this.queries.getUnresolvedReferencesBatch(0, batchSize);
  563. if (batch.length === 0) break;
  564. const result = this.resolveAll(batch);
  565. // Persist edges immediately
  566. const edges = this.createEdges(result.resolved);
  567. if (edges.length > 0) {
  568. this.queries.insertEdges(edges);
  569. }
  570. // Clean up resolved refs so they don't appear in the next batch
  571. if (result.resolved.length > 0) {
  572. this.queries.deleteSpecificResolvedReferences(
  573. result.resolved.map((r) => ({
  574. fromNodeId: r.original.fromNodeId,
  575. referenceName: r.original.referenceName,
  576. referenceKind: r.original.referenceKind,
  577. }))
  578. );
  579. }
  580. // Delete unresolvable refs from this batch to avoid re-processing them
  581. if (result.unresolved.length > 0) {
  582. this.queries.deleteSpecificResolvedReferences(
  583. result.unresolved.map((r) => ({
  584. fromNodeId: r.fromNodeId,
  585. referenceName: r.referenceName,
  586. referenceKind: r.referenceKind,
  587. }))
  588. );
  589. }
  590. // Aggregate stats
  591. aggregateStats.total += result.stats.total;
  592. aggregateStats.resolved += result.stats.resolved;
  593. aggregateStats.unresolved += result.stats.unresolved;
  594. for (const [method, count] of Object.entries(result.stats.byMethod)) {
  595. aggregateStats.byMethod[method] = (aggregateStats.byMethod[method] || 0) + count;
  596. }
  597. processed += batch.length;
  598. onProgress?.(processed, total);
  599. // Yield so progress UI can render between batches
  600. await new Promise(resolve => setImmediate(resolve));
  601. // If nothing was resolved or removed in this batch, we'd loop forever
  602. // on the same rows. Break to avoid infinite loop.
  603. if (result.resolved.length === 0 && result.unresolved.length === batch.length) {
  604. break;
  605. }
  606. }
  607. // Dynamic-edge synthesis: now that all base `calls` edges are persisted,
  608. // synthesize observer/callback dispatch edges (dispatcher → registered
  609. // callbacks) that static parsing leaves out. Best-effort — never fail the
  610. // index on it. See docs/design/callback-edge-synthesis.md.
  611. try {
  612. aggregateStats.byMethod['callback-synthesis'] = synthesizeCallbackEdges(this.queries, this.context);
  613. } catch {
  614. // synthesis is additive and optional; ignore failures
  615. }
  616. return {
  617. resolved: [],
  618. unresolved: [],
  619. stats: aggregateStats,
  620. };
  621. }
  622. /**
  623. * Get detected frameworks
  624. */
  625. getDetectedFrameworks(): string[] {
  626. return this.frameworks.map((f) => f.name);
  627. }
  628. /**
  629. * Check if reference is to a built-in or external symbol
  630. */
  631. private isBuiltInOrExternal(ref: UnresolvedRef): boolean {
  632. const name = ref.referenceName;
  633. const isJsTs = ref.language === 'typescript' || ref.language === 'javascript'
  634. || ref.language === 'tsx' || ref.language === 'jsx';
  635. // JavaScript/TypeScript built-ins
  636. if (isJsTs && JS_BUILT_INS.has(name)) {
  637. return true;
  638. }
  639. // Common JS/TS library calls (console.log, Math.floor, JSON.parse)
  640. if (isJsTs && (name.startsWith('console.') || name.startsWith('Math.') || name.startsWith('JSON.'))) {
  641. return true;
  642. }
  643. // React hooks from React itself
  644. if (isJsTs && REACT_HOOKS.has(name)) {
  645. return true;
  646. }
  647. // Python built-ins (bare calls only — dotted calls like console.print are method calls)
  648. if (ref.language === 'python' && PYTHON_BUILT_INS.has(name)) {
  649. return true;
  650. }
  651. // Python built-in method calls (e.g., list.extend, dict.update)
  652. if (ref.language === 'python') {
  653. const dotIdx = name.indexOf('.');
  654. if (dotIdx > 0) {
  655. const receiver = name.substring(0, dotIdx);
  656. const method = name.substring(dotIdx + 1);
  657. // Filter calls on built-in types (list.append, dict.update, etc.)
  658. if (PYTHON_BUILT_IN_TYPES.has(receiver)) {
  659. return true;
  660. }
  661. // Filter built-in methods on non-class receivers
  662. // (e.g., items.append where items is a local list variable)
  663. // But allow if the capitalized receiver matches a known codebase class
  664. if (PYTHON_BUILT_IN_METHODS.has(method)) {
  665. const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
  666. if (!this.knownNames?.has(capitalized)) {
  667. return true;
  668. }
  669. }
  670. }
  671. // A bare name colliding with a builtin method (index, get, update, count…)
  672. // is only a builtin when NOTHING in the codebase declares it. A declared
  673. // symbol with that exact name — e.g. a Flask/FastAPI view `def index()` or
  674. // `def get()` — is a real reference target. Mirrors the knownNames guard on
  675. // the dotted branch above; without it, every handler named after a builtin
  676. // method silently loses its route→handler edge.
  677. if (PYTHON_BUILT_IN_METHODS.has(name) && !this.knownNames?.has(name)) {
  678. return true;
  679. }
  680. }
  681. // Go standard library packages — refs like "fmt.Println", "http.ListenAndServe", etc.
  682. if (ref.language === 'go') {
  683. const dotIdx = name.indexOf('.');
  684. if (dotIdx > 0) {
  685. const pkg = name.substring(0, dotIdx);
  686. if (GO_STDLIB_PACKAGES.has(pkg)) {
  687. return true;
  688. }
  689. }
  690. if (GO_BUILT_INS.has(name)) {
  691. return true;
  692. }
  693. }
  694. // Pascal/Delphi built-ins and standard library units
  695. if (ref.language === 'pascal') {
  696. if (PASCAL_UNIT_PREFIXES.some((p) => name.startsWith(p))) {
  697. return true;
  698. }
  699. if (PASCAL_BUILT_INS.has(name)) {
  700. return true;
  701. }
  702. }
  703. return false;
  704. }
  705. /**
  706. * Get file path from node ID
  707. */
  708. private getFilePathFromNodeId(nodeId: string): string {
  709. const node = this.queries.getNodeById(nodeId);
  710. return node?.filePath || '';
  711. }
  712. /**
  713. * Get language from node ID
  714. */
  715. private getLanguageFromNodeId(nodeId: string): UnresolvedRef['language'] {
  716. const node = this.queries.getNodeById(nodeId);
  717. return node?.language || 'unknown';
  718. }
  719. }
  720. /**
  721. * Create a reference resolver instance
  722. */
  723. export function createResolver(projectRoot: string, queries: QueryBuilder): ReferenceResolver {
  724. const resolver = new ReferenceResolver(projectRoot, queries);
  725. resolver.initialize();
  726. return resolver;
  727. }