| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876 |
- /**
- * Reference Resolution Orchestrator
- *
- * Coordinates all reference resolution strategies.
- */
- import * as fs from 'fs';
- import * as path from 'path';
- import { Node, UnresolvedReference, Edge } from '../types';
- import { QueryBuilder } from '../db/queries';
- import {
- UnresolvedRef,
- ResolvedRef,
- ResolutionResult,
- ResolutionContext,
- FrameworkResolver,
- ImportMapping,
- } from './types';
- import { matchReference } from './name-matcher';
- import { resolveViaImport, extractImportMappings, extractReExports } from './import-resolver';
- import { detectFrameworks } from './frameworks';
- import { synthesizeCallbackEdges } from './callback-synthesizer';
- import { loadProjectAliases, type AliasMap } from './path-aliases';
- import { logDebug } from '../errors';
- import type { ReExport } from './types';
- import { LRUCache } from './lru-cache';
- /**
- * Cache size limits. Each per-resolver cache is bounded so memory
- * stays flat on large codebases (20k+ files). Sizes were chosen to
- * cover the working set for typical resolution batches without
- * exceeding a few hundred MB worst-case. Override via the env var
- * `CODEGRAPH_RESOLVER_CACHE_SIZE` (single integer applied to all
- * caches) when tuning for very large or very small projects.
- */
- const DEFAULT_CACHE_LIMIT = 5_000;
- function resolveCacheLimit(): number {
- const raw = process.env.CODEGRAPH_RESOLVER_CACHE_SIZE;
- if (!raw) return DEFAULT_CACHE_LIMIT;
- const parsed = Number.parseInt(raw, 10);
- if (Number.isFinite(parsed) && parsed > 0) return parsed;
- return DEFAULT_CACHE_LIMIT;
- }
- // Re-export types
- export * from './types';
- // Pre-built Sets for O(1) built-in lookups (allocated once, shared across all instances)
- const JS_BUILT_INS = new Set([
- 'console', 'window', 'document', 'global', 'process',
- 'Promise', 'Array', 'Object', 'String', 'Number', 'Boolean',
- 'Date', 'Math', 'JSON', 'RegExp', 'Error', 'Map', 'Set',
- 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
- 'fetch', 'require', 'module', 'exports', '__dirname', '__filename',
- ]);
- const REACT_HOOKS = new Set([
- 'useState', 'useEffect', 'useContext', 'useReducer', 'useCallback',
- 'useMemo', 'useRef', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
- ]);
- const PYTHON_BUILT_INS = new Set([
- 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
- 'open', 'input', 'type', 'isinstance', 'hasattr', 'getattr', 'setattr',
- 'super', 'self', 'cls', 'None', 'True', 'False',
- ]);
- const PYTHON_BUILT_IN_TYPES = new Set([
- 'list', 'dict', 'set', 'tuple', 'str', 'int', 'float', 'bool',
- 'bytes', 'bytearray', 'frozenset', 'object', 'super',
- ]);
- const PYTHON_BUILT_IN_METHODS = new Set([
- 'append', 'extend', 'insert', 'remove', 'pop', 'clear', 'sort', 'reverse', 'copy',
- 'update', 'keys', 'values', 'items', 'get',
- 'add', 'discard', 'union', 'intersection', 'difference',
- 'split', 'join', 'strip', 'lstrip', 'rstrip', 'replace', 'lower', 'upper',
- 'startswith', 'endswith', 'find', 'index', 'count', 'encode', 'decode',
- 'format', 'isdigit', 'isalpha', 'isalnum',
- 'read', 'write', 'readline', 'readlines', 'close', 'flush', 'seek',
- ]);
- const GO_STDLIB_PACKAGES = new Set([
- 'fmt', 'os', 'io', 'net', 'http', 'log', 'math', 'sort', 'sync',
- 'time', 'path', 'bytes', 'strings', 'strconv', 'errors', 'context',
- 'json', 'xml', 'csv', 'html', 'template', 'regexp', 'reflect',
- 'runtime', 'testing', 'flag', 'bufio', 'crypto', 'encoding',
- 'filepath', 'hash', 'mime', 'rand', 'signal', 'sql', 'syscall',
- 'unicode', 'unsafe', 'atomic', 'binary', 'debug', 'exec', 'heap',
- 'ring', 'scanner', 'tar', 'zip', 'gzip', 'zlib', 'tls', 'url',
- 'user', 'pprof', 'trace', 'ast', 'build', 'parser', 'printer',
- 'token', 'types', 'cgo', 'plugin', 'race', 'ioutil',
- // Kubernetes-common stdlib aliases
- 'utilruntime', 'utilwait', 'utilnet',
- ]);
- const GO_BUILT_INS = new Set([
- 'make', 'new', 'len', 'cap', 'append', 'copy', 'delete', 'close',
- 'panic', 'recover', 'print', 'println', 'complex', 'real', 'imag',
- 'error', 'nil', 'true', 'false', 'iota',
- 'int', 'int8', 'int16', 'int32', 'int64',
- 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uintptr',
- 'float32', 'float64', 'complex64', 'complex128',
- 'string', 'bool', 'byte', 'rune', 'any',
- ]);
- const PASCAL_UNIT_PREFIXES = [
- 'System.', 'Winapi.', 'Vcl.', 'Fmx.', 'Data.', 'Datasnap.',
- 'Soap.', 'Xml.', 'Web.', 'REST.', 'FireDAC.', 'IBX.',
- 'IdHTTP', 'IdTCP', 'IdSSL',
- ];
- const PASCAL_BUILT_INS = new Set([
- 'System', 'SysUtils', 'Classes', 'Types', 'Variants', 'StrUtils',
- 'Math', 'DateUtils', 'IOUtils', 'Generics.Collections', 'Generics.Defaults',
- 'Rtti', 'TypInfo', 'SyncObjs', 'RegularExpressions',
- 'SysInit', 'Windows', 'Messages', 'Graphics', 'Controls', 'Forms',
- 'Dialogs', 'StdCtrls', 'ExtCtrls', 'ComCtrls', 'Menus', 'ActnList',
- 'WriteLn', 'Write', 'ReadLn', 'Read', 'Inc', 'Dec', 'Ord', 'Chr',
- 'Length', 'SetLength', 'High', 'Low', 'Assigned', 'FreeAndNil',
- 'Format', 'IntToStr', 'StrToInt', 'FloatToStr', 'StrToFloat',
- 'Trim', 'UpperCase', 'LowerCase', 'Pos', 'Copy', 'Delete', 'Insert',
- 'Now', 'Date', 'Time', 'DateToStr', 'StrToDate',
- 'Raise', 'Exit', 'Break', 'Continue', 'Abort',
- 'True', 'False', 'nil', 'Self', 'Result',
- 'Create', 'Destroy', 'Free',
- 'TObject', 'TComponent', 'TPersistent', 'TInterfacedObject',
- 'TList', 'TStringList', 'TStrings', 'TStream', 'TMemoryStream', 'TFileStream',
- 'Exception', 'EAbort', 'EConvertError', 'EAccessViolation',
- 'IInterface', 'IUnknown',
- ]);
- /**
- * Reference Resolver
- *
- * Orchestrates reference resolution using multiple strategies.
- */
- export class ReferenceResolver {
- private projectRoot: string;
- private queries: QueryBuilder;
- private context: ResolutionContext;
- private frameworks: FrameworkResolver[] = [];
- // All per-resolver caches are LRU-bounded. Previously these were
- // unbounded Maps that grew with every distinct lookup and OOM'd on
- // codebases with 20k+ files (see issue: unbounded cache growth).
- private nodeCache: LRUCache<string, Node[]>; // per-file node cache
- private fileCache: LRUCache<string, string | null>; // per-file content cache
- private importMappingCache: LRUCache<string, ImportMapping[]>;
- private reExportCache: LRUCache<string, ReExport[]>;
- private nameCache: LRUCache<string, Node[]>; // name → nodes cache
- private lowerNameCache: LRUCache<string, Node[]>; // lower(name) → nodes cache
- private qualifiedNameCache: LRUCache<string, Node[]>; // qualified_name → nodes cache
- private knownNames: Set<string> | null = null; // all known symbol names for fast pre-filtering
- private knownFiles: Set<string> | null = null;
- private cachesWarmed = false;
- // tsconfig/jsconfig path-alias map. `undefined` = not yet computed,
- // `null` = computed and absent. Treated as immutable for the
- // resolver's lifetime; callers re-create the resolver if config changes.
- private projectAliases: AliasMap | null | undefined = undefined;
- constructor(projectRoot: string, queries: QueryBuilder) {
- this.projectRoot = projectRoot;
- this.queries = queries;
- const limit = resolveCacheLimit();
- // The content cache is heavier (full file text), so we give it a
- // smaller budget than the metadata caches.
- const contentLimit = Math.max(64, Math.floor(limit / 5));
- this.nodeCache = new LRUCache(limit);
- this.fileCache = new LRUCache(contentLimit);
- this.importMappingCache = new LRUCache(limit);
- this.reExportCache = new LRUCache(limit);
- this.nameCache = new LRUCache(limit);
- this.lowerNameCache = new LRUCache(limit);
- this.qualifiedNameCache = new LRUCache(limit);
- this.context = this.createContext();
- }
- /**
- * Initialize the resolver (detect frameworks, etc.)
- */
- initialize(): void {
- this.frameworks = detectFrameworks(this.context);
- this.clearCaches();
- }
- /**
- * Run each framework resolver's cross-file finalization pass and persist
- * the returned node updates. Idempotent — safe to call after every indexAll
- * and every incremental sync. Returns the number of nodes updated.
- *
- * Caches are cleared before/after so the post-extract pass sees fresh DB
- * state and downstream queries see the updated names.
- */
- runPostExtract(): number {
- let updated = 0;
- this.clearCaches();
- for (const fw of this.frameworks) {
- if (!fw.postExtract) continue;
- try {
- const nodes = fw.postExtract(this.context);
- for (const node of nodes) {
- this.queries.updateNode(node);
- updated++;
- }
- } catch (err) {
- logDebug(`Framework '${fw.name}' postExtract failed`, {
- error: err instanceof Error ? err.message : String(err),
- });
- }
- }
- if (updated > 0) this.clearCaches();
- return updated;
- }
- /**
- * Pre-build lightweight caches for resolution.
- * Node lookups are now handled by indexed SQLite queries instead of
- * loading all nodes into memory (which caused OOM on large codebases).
- * We cache the set of known symbol names for fast pre-filtering.
- */
- warmCaches(): void {
- if (this.cachesWarmed) return;
- // Only cache the set of known file paths (lightweight string set)
- this.knownFiles = new Set(this.queries.getAllFilePaths());
- // Cache all distinct symbol names for fast pre-filtering (just strings, not full nodes)
- this.knownNames = new Set(this.queries.getAllNodeNames());
- this.cachesWarmed = true;
- }
- /**
- * Clear internal caches
- */
- clearCaches(): void {
- this.nodeCache.clear();
- this.fileCache.clear();
- this.importMappingCache.clear();
- this.reExportCache.clear();
- this.nameCache.clear();
- this.lowerNameCache.clear();
- this.qualifiedNameCache.clear();
- this.knownNames = null;
- this.knownFiles = null;
- this.cachesWarmed = false;
- }
- /**
- * Create the resolution context
- */
- private createContext(): ResolutionContext {
- return {
- getNodesInFile: (filePath: string) => {
- if (!this.nodeCache.has(filePath)) {
- this.nodeCache.set(filePath, this.queries.getNodesByFile(filePath));
- }
- return this.nodeCache.get(filePath)!;
- },
- getNodesByName: (name: string) => {
- const cached = this.nameCache.get(name);
- if (cached !== undefined) return cached;
- const result = this.queries.getNodesByName(name);
- this.nameCache.set(name, result);
- return result;
- },
- getNodesByQualifiedName: (qualifiedName: string) => {
- const cached = this.qualifiedNameCache.get(qualifiedName);
- if (cached !== undefined) return cached;
- const result = this.queries.getNodesByQualifiedNameExact(qualifiedName);
- this.qualifiedNameCache.set(qualifiedName, result);
- return result;
- },
- getNodesByKind: (kind: Node['kind']) => {
- return this.queries.getNodesByKind(kind);
- },
- fileExists: (filePath: string) => {
- // Check pre-built known files set first (O(1))
- if (this.knownFiles) {
- const normalized = filePath.replace(/\\/g, '/');
- if (this.knownFiles.has(filePath) || this.knownFiles.has(normalized)) {
- return true;
- }
- }
- // Fall back to filesystem for files not yet indexed
- const fullPath = path.join(this.projectRoot, filePath);
- try {
- return fs.existsSync(fullPath);
- } catch (error) {
- logDebug('Error checking file existence', { filePath, error: String(error) });
- return false;
- }
- },
- readFile: (filePath: string) => {
- if (this.fileCache.has(filePath)) {
- return this.fileCache.get(filePath)!;
- }
- const fullPath = path.join(this.projectRoot, filePath);
- try {
- const content = fs.readFileSync(fullPath, 'utf-8');
- this.fileCache.set(filePath, content);
- return content;
- } catch (error) {
- logDebug('Failed to read file for resolution', { filePath, error: String(error) });
- this.fileCache.set(filePath, null);
- return null;
- }
- },
- getProjectRoot: () => this.projectRoot,
- getAllFiles: () => {
- return this.queries.getAllFilePaths();
- },
- listDirectories: (relativePath: string) => {
- const target = relativePath === '.' || relativePath === ''
- ? this.projectRoot
- : path.join(this.projectRoot, relativePath);
- try {
- return fs
- .readdirSync(target, { withFileTypes: true })
- .filter((entry) => entry.isDirectory())
- .map((entry) => entry.name);
- } catch (error) {
- logDebug('Failed to list directory for resolution', {
- relativePath,
- error: String(error),
- });
- return [];
- }
- },
- getNodesByLowerName: (lowerName: string) => {
- const cached = this.lowerNameCache.get(lowerName);
- if (cached !== undefined) return cached;
- const result = this.queries.getNodesByLowerName(lowerName);
- this.lowerNameCache.set(lowerName, result);
- return result;
- },
- getImportMappings: (filePath: string, language) => {
- const cacheKey = filePath;
- const cached = this.importMappingCache.get(cacheKey);
- if (cached) return cached;
- const content = this.context.readFile(filePath);
- if (!content) {
- this.importMappingCache.set(cacheKey, []);
- return [];
- }
- const mappings = extractImportMappings(filePath, content, language);
- this.importMappingCache.set(cacheKey, mappings);
- return mappings;
- },
- getProjectAliases: () => {
- if (this.projectAliases === undefined) {
- this.projectAliases = loadProjectAliases(this.projectRoot);
- }
- return this.projectAliases;
- },
- getReExports: (filePath: string, language) => {
- const cached = this.reExportCache.get(filePath);
- if (cached) return cached;
- const content = this.context.readFile(filePath);
- if (!content) {
- this.reExportCache.set(filePath, []);
- return [];
- }
- const reExports = extractReExports(content, language);
- this.reExportCache.set(filePath, reExports);
- return reExports;
- },
- };
- }
- /**
- * Resolve all unresolved references
- */
- resolveAll(
- unresolvedRefs: UnresolvedReference[],
- onProgress?: (current: number, total: number) => void
- ): ResolutionResult {
- // Pre-load all nodes into memory for fast lookups
- this.warmCaches();
- const resolved: ResolvedRef[] = [];
- const unresolved: UnresolvedRef[] = [];
- const byMethod: Record<string, number> = {};
- // Convert to our internal format, using denormalized fields when available
- const refs: UnresolvedRef[] = unresolvedRefs.map((ref) => ({
- fromNodeId: ref.fromNodeId,
- referenceName: ref.referenceName,
- referenceKind: ref.referenceKind,
- line: ref.line,
- column: ref.column,
- filePath: ref.filePath || this.getFilePathFromNodeId(ref.fromNodeId),
- language: ref.language || this.getLanguageFromNodeId(ref.fromNodeId),
- }));
- const total = refs.length;
- let lastReportedPercent = -1;
- for (let i = 0; i < refs.length; i++) {
- const ref = refs[i]!; // Array index is guaranteed to be in bounds
- const result = this.resolveOne(ref);
- if (result) {
- resolved.push(result);
- byMethod[result.resolvedBy] = (byMethod[result.resolvedBy] || 0) + 1;
- } else {
- unresolved.push(ref);
- }
- // Report progress every 1% to avoid too many updates
- if (onProgress) {
- const currentPercent = Math.floor((i / total) * 100);
- if (currentPercent > lastReportedPercent) {
- lastReportedPercent = currentPercent;
- onProgress(i + 1, total);
- }
- }
- }
- // Final progress report
- if (onProgress && total > 0) {
- onProgress(total, total);
- }
- return {
- resolved,
- unresolved,
- stats: {
- total: refs.length,
- resolved: resolved.length,
- unresolved: unresolved.length,
- byMethod,
- },
- };
- }
- /**
- * Check if a reference name has any possible match in the codebase.
- * Uses the pre-built knownNames set to skip expensive resolution
- * for names that definitely don't exist as symbols.
- */
- private hasAnyPossibleMatch(name: string): boolean {
- if (!this.knownNames) return true; // no pre-filter available
- // Direct name match
- if (this.knownNames.has(name)) return true;
- // For qualified names like "obj.method" or "Class::method", check the parts
- const dotIdx = name.indexOf('.');
- if (dotIdx > 0) {
- const receiver = name.substring(0, dotIdx);
- const member = name.substring(dotIdx + 1);
- if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
- // Also check capitalized receiver (instance-method resolution)
- const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
- if (this.knownNames.has(capitalized)) return true;
- }
- const colonIdx = name.indexOf('::');
- if (colonIdx > 0) {
- const receiver = name.substring(0, colonIdx);
- const member = name.substring(colonIdx + 2);
- if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true;
- }
- // For path-like references (e.g., "snippets/drawer-menu.liquid"), check the filename
- const slashIdx = name.lastIndexOf('/');
- if (slashIdx > 0) {
- const fileName = name.substring(slashIdx + 1);
- if (this.knownNames.has(fileName)) return true;
- }
- return false;
- }
- /**
- * Does `ref.referenceName` match an import declared in its containing
- * file? Used as a pre-filter escape so re-export chain resolution
- * still gets a chance when the name has no project-wide declaration.
- */
- private matchesAnyImport(ref: UnresolvedRef): boolean {
- const imports = this.context.getImportMappings(ref.filePath, ref.language);
- if (imports.length === 0) return false;
- for (const imp of imports) {
- if (
- imp.localName === ref.referenceName ||
- ref.referenceName.startsWith(imp.localName + '.')
- ) {
- return true;
- }
- }
- return false;
- }
- /**
- * Resolve a single reference
- */
- resolveOne(ref: UnresolvedRef): ResolvedRef | null {
- // Skip built-in/external references
- if (this.isBuiltInOrExternal(ref)) {
- return null;
- }
- // Fast pre-filter: skip if no symbol with this name exists anywhere
- // AND the name doesn't match a local import. The import escape is
- // necessary because re-export rename chains (`import { login }
- // from './barrel'` where the barrel has `export { signIn as login }
- // from './auth'`) intentionally call a name that has no
- // declaration anywhere — only the renamed upstream symbol does.
- if (
- !this.hasAnyPossibleMatch(ref.referenceName) &&
- !this.matchesAnyImport(ref) &&
- !this.frameworks.some((f) => f.claimsReference?.(ref.referenceName))
- ) {
- return null;
- }
- const candidates: ResolvedRef[] = [];
- // Strategy 1: Try framework-specific resolution
- for (const framework of this.frameworks) {
- const result = framework.resolve(ref, this.context);
- if (result) {
- if (result.confidence >= 0.9) return result; // High confidence, return immediately
- candidates.push(result);
- }
- }
- // Strategy 2: Try import-based resolution
- const importResult = resolveViaImport(ref, this.context);
- if (importResult) {
- if (importResult.confidence >= 0.9) return importResult;
- candidates.push(importResult);
- }
- // Strategy 3: Try name matching
- const nameResult = matchReference(ref, this.context);
- if (nameResult) {
- candidates.push(nameResult);
- }
- if (candidates.length === 0) return null;
- // Return highest confidence candidate
- return candidates.reduce((best, curr) =>
- curr.confidence > best.confidence ? curr : best
- );
- }
- /**
- * Create edges from resolved references
- */
- createEdges(resolved: ResolvedRef[]): Edge[] {
- return resolved.map((ref) => {
- let kind = ref.original.referenceKind;
- // Promote "extends" to "implements" when a class/struct targets an interface
- if (kind === 'extends') {
- const targetNode = this.queries.getNodeById(ref.targetNodeId);
- if (targetNode && (targetNode.kind === 'interface' || targetNode.kind === 'protocol')) {
- const sourceNode = this.queries.getNodeById(ref.original.fromNodeId);
- if (sourceNode && sourceNode.kind !== 'interface' && sourceNode.kind !== 'protocol') {
- kind = 'implements';
- }
- }
- }
- // Promote "calls" to "instantiates" when the resolved target is a
- // class/struct. Languages without a `new` keyword (Python, Ruby)
- // express instantiation as `Foo()` — extraction can't tell that
- // apart from a function call without symbol info, but resolution
- // can: if `Foo` resolves to a class, the call IS an instantiation.
- if (kind === 'calls') {
- const targetNode = this.queries.getNodeById(ref.targetNodeId);
- if (targetNode && (targetNode.kind === 'class' || targetNode.kind === 'struct')) {
- kind = 'instantiates';
- }
- }
- return {
- source: ref.original.fromNodeId,
- target: ref.targetNodeId,
- kind,
- line: ref.original.line,
- column: ref.original.column,
- metadata: {
- confidence: ref.confidence,
- resolvedBy: ref.resolvedBy,
- },
- };
- });
- }
- /**
- * Defense-in-depth: drop edges whose source or target is no longer in
- * the nodes table. PR #62 (issue #42) applied this filter at the
- * extraction-layer `insertEdges` site; #455 reports the same
- * `FOREIGN KEY constraint failed` reappearing here at the
- * resolution-layer site during watch sync, where a resolver lookup that
- * crosses a framework-specific cache can hand us a target whose node
- * was removed by a concurrent file rewrite. One batched, cache-aware
- * `getNodesByIds` query is enough to skip those edges quietly instead
- * of aborting the whole sync.
- */
- private filterEdgesByExistingNodes(edges: Edge[]): Edge[] {
- if (edges.length === 0) return edges;
- const allIds = new Set<string>();
- for (const e of edges) {
- allIds.add(e.source);
- allIds.add(e.target);
- }
- const existing = this.queries.getNodesByIds([...allIds]);
- return edges.filter((e) => existing.has(e.source) && existing.has(e.target));
- }
- /**
- * Resolve and persist edges to database
- */
- resolveAndPersist(
- unresolvedRefs: UnresolvedReference[],
- onProgress?: (current: number, total: number) => void
- ): ResolutionResult {
- const result = this.resolveAll(unresolvedRefs, onProgress);
- // Create edges from resolved references
- const edges = this.createEdges(result.resolved);
- // Insert edges into database
- const validEdges = this.filterEdgesByExistingNodes(edges);
- if (validEdges.length > 0) {
- this.queries.insertEdges(validEdges);
- }
- // Clean up resolved refs from unresolved_refs table so metrics are accurate
- if (result.resolved.length > 0) {
- this.queries.deleteSpecificResolvedReferences(
- result.resolved.map((r) => ({
- fromNodeId: r.original.fromNodeId,
- referenceName: r.original.referenceName,
- referenceKind: r.original.referenceKind,
- }))
- );
- }
- return result;
- }
- /**
- * Resolve and persist in batches to keep memory bounded.
- * Processes unresolved references in chunks, persisting edges and cleaning
- * up resolved refs after each batch to avoid accumulating large arrays.
- */
- async resolveAndPersistBatched(
- onProgress?: (current: number, total: number) => void,
- batchSize: number = 5000
- ): Promise<ResolutionResult> {
- this.warmCaches();
- const total = this.queries.getUnresolvedReferencesCount();
- let processed = 0;
- const aggregateStats = {
- total: 0,
- resolved: 0,
- unresolved: 0,
- byMethod: {} as Record<string, number>,
- };
- // Process in batches. We always read from offset 0 because resolved refs
- // are deleted after each batch, shifting the remaining rows forward.
- while (true) {
- const batch = this.queries.getUnresolvedReferencesBatch(0, batchSize);
- if (batch.length === 0) break;
- const result = this.resolveAll(batch);
- // Persist edges immediately
- const edges = this.createEdges(result.resolved);
- const validEdges = this.filterEdgesByExistingNodes(edges);
- if (validEdges.length > 0) {
- this.queries.insertEdges(validEdges);
- }
- // Clean up resolved refs so they don't appear in the next batch
- if (result.resolved.length > 0) {
- this.queries.deleteSpecificResolvedReferences(
- result.resolved.map((r) => ({
- fromNodeId: r.original.fromNodeId,
- referenceName: r.original.referenceName,
- referenceKind: r.original.referenceKind,
- }))
- );
- }
- // Delete unresolvable refs from this batch to avoid re-processing them
- if (result.unresolved.length > 0) {
- this.queries.deleteSpecificResolvedReferences(
- result.unresolved.map((r) => ({
- fromNodeId: r.fromNodeId,
- referenceName: r.referenceName,
- referenceKind: r.referenceKind,
- }))
- );
- }
- // Aggregate stats
- aggregateStats.total += result.stats.total;
- aggregateStats.resolved += result.stats.resolved;
- aggregateStats.unresolved += result.stats.unresolved;
- for (const [method, count] of Object.entries(result.stats.byMethod)) {
- aggregateStats.byMethod[method] = (aggregateStats.byMethod[method] || 0) + count;
- }
- processed += batch.length;
- onProgress?.(processed, total);
- // Yield so progress UI can render between batches
- await new Promise(resolve => setImmediate(resolve));
- // If nothing was resolved or removed in this batch, we'd loop forever
- // on the same rows. Break to avoid infinite loop.
- if (result.resolved.length === 0 && result.unresolved.length === batch.length) {
- break;
- }
- }
- // Dynamic-edge synthesis: now that all base `calls` edges are persisted,
- // synthesize observer/callback dispatch edges (dispatcher → registered
- // callbacks) that static parsing leaves out. Best-effort — never fail the
- // index on it. See docs/design/callback-edge-synthesis.md.
- try {
- aggregateStats.byMethod['callback-synthesis'] = synthesizeCallbackEdges(this.queries, this.context);
- } catch {
- // synthesis is additive and optional; ignore failures
- }
- return {
- resolved: [],
- unresolved: [],
- stats: aggregateStats,
- };
- }
- /**
- * Get detected frameworks
- */
- getDetectedFrameworks(): string[] {
- return this.frameworks.map((f) => f.name);
- }
- /**
- * Check if reference is to a built-in or external symbol
- */
- private isBuiltInOrExternal(ref: UnresolvedRef): boolean {
- const name = ref.referenceName;
- const isJsTs = ref.language === 'typescript' || ref.language === 'javascript'
- || ref.language === 'tsx' || ref.language === 'jsx';
- // JavaScript/TypeScript built-ins
- if (isJsTs && JS_BUILT_INS.has(name)) {
- return true;
- }
- // Common JS/TS library calls (console.log, Math.floor, JSON.parse)
- if (isJsTs && (name.startsWith('console.') || name.startsWith('Math.') || name.startsWith('JSON.'))) {
- return true;
- }
- // React hooks from React itself
- if (isJsTs && REACT_HOOKS.has(name)) {
- return true;
- }
- // Python built-ins (bare calls only — dotted calls like console.print are method calls)
- if (ref.language === 'python' && PYTHON_BUILT_INS.has(name)) {
- return true;
- }
- // Python built-in method calls (e.g., list.extend, dict.update)
- if (ref.language === 'python') {
- const dotIdx = name.indexOf('.');
- if (dotIdx > 0) {
- const receiver = name.substring(0, dotIdx);
- const method = name.substring(dotIdx + 1);
- // Filter calls on built-in types (list.append, dict.update, etc.)
- if (PYTHON_BUILT_IN_TYPES.has(receiver)) {
- return true;
- }
- // Filter built-in methods on non-class receivers
- // (e.g., items.append where items is a local list variable)
- // But allow if the capitalized receiver matches a known codebase class
- if (PYTHON_BUILT_IN_METHODS.has(method)) {
- const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1);
- if (!this.knownNames?.has(capitalized)) {
- return true;
- }
- }
- }
- // A bare name colliding with a builtin method (index, get, update, count…)
- // is only a builtin when NOTHING in the codebase declares it. A declared
- // symbol with that exact name — e.g. a Flask/FastAPI view `def index()` or
- // `def get()` — is a real reference target. Mirrors the knownNames guard on
- // the dotted branch above; without it, every handler named after a builtin
- // method silently loses its route→handler edge.
- if (PYTHON_BUILT_IN_METHODS.has(name) && !this.knownNames?.has(name)) {
- return true;
- }
- }
- // Go standard library packages — refs like "fmt.Println", "http.ListenAndServe", etc.
- if (ref.language === 'go') {
- const dotIdx = name.indexOf('.');
- if (dotIdx > 0) {
- const pkg = name.substring(0, dotIdx);
- if (GO_STDLIB_PACKAGES.has(pkg)) {
- return true;
- }
- }
- if (GO_BUILT_INS.has(name)) {
- return true;
- }
- }
- // Pascal/Delphi built-ins and standard library units
- if (ref.language === 'pascal') {
- if (PASCAL_UNIT_PREFIXES.some((p) => name.startsWith(p))) {
- return true;
- }
- if (PASCAL_BUILT_INS.has(name)) {
- return true;
- }
- }
- return false;
- }
- /**
- * Get file path from node ID
- */
- private getFilePathFromNodeId(nodeId: string): string {
- const node = this.queries.getNodeById(nodeId);
- return node?.filePath || '';
- }
- /**
- * Get language from node ID
- */
- private getLanguageFromNodeId(nodeId: string): UnresolvedRef['language'] {
- const node = this.queries.getNodeById(nodeId);
- return node?.language || 'unknown';
- }
- }
- /**
- * Create a reference resolver instance
- */
- export function createResolver(projectRoot: string, queries: QueryBuilder): ReferenceResolver {
- const resolver = new ReferenceResolver(projectRoot, queries);
- resolver.initialize();
- return resolver;
- }
|