utils.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. /**
  2. * CodeGraph Utilities
  3. *
  4. * Common utility functions for memory management, concurrency, batching,
  5. * and security validation.
  6. *
  7. * @module utils
  8. *
  9. * @example
  10. * ```typescript
  11. * import { Mutex, processInBatches, MemoryMonitor, validatePathWithinRoot } from 'codegraph';
  12. *
  13. * // Use mutex for concurrent safety
  14. * const mutex = new Mutex();
  15. * await mutex.withLock(async () => {
  16. * await performCriticalOperation();
  17. * });
  18. *
  19. * // Process items in batches to manage memory
  20. * const results = await processInBatches(items, 100, async (item) => {
  21. * return await processItem(item);
  22. * });
  23. *
  24. * // Monitor memory usage
  25. * const monitor = new MemoryMonitor(512, (usage) => {
  26. * console.warn(`Memory usage exceeded 512MB: ${usage / 1024 / 1024}MB`);
  27. * });
  28. * monitor.start();
  29. * ```
  30. */
  31. import * as fs from 'fs';
  32. import * as path from 'path';
  33. // ============================================================
  34. // SECURITY UTILITIES
  35. // ============================================================
  36. /**
  37. * Sensitive system directories that should never be used as project roots.
  38. * Checked on all platforms; non-applicable paths are harmlessly skipped.
  39. */
  40. const SENSITIVE_PATHS = new Set([
  41. '/', '/etc', '/usr', '/bin', '/sbin', '/var', '/tmp', '/dev', '/proc', '/sys',
  42. '/root', '/boot', '/lib', '/lib64', '/opt',
  43. 'c:\\', 'c:\\windows', 'c:\\windows\\system32',
  44. ]);
  45. /**
  46. * Validate that a resolved file path stays within the project root.
  47. * Prevents path traversal attacks (e.g. node.filePath = "../../etc/passwd").
  48. *
  49. * @param projectRoot - The project root directory
  50. * @param filePath - The relative file path to validate
  51. * @returns The resolved absolute path, or null if it escapes the root
  52. */
  53. export function validatePathWithinRoot(projectRoot: string, filePath: string): string | null {
  54. const resolved = path.resolve(projectRoot, filePath);
  55. const normalizedRoot = path.resolve(projectRoot);
  56. if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) {
  57. return null;
  58. }
  59. return resolved;
  60. }
  61. /**
  62. * Validate that a path is a safe project root directory.
  63. *
  64. * Rejects sensitive system directories and ensures the path is
  65. * a real, existing directory. Used at MCP and API entry points
  66. * to prevent arbitrary directory access.
  67. *
  68. * @param dirPath - The path to validate
  69. * @returns An error message if invalid, or null if valid
  70. */
  71. export function validateProjectPath(dirPath: string): string | null {
  72. const resolved = path.resolve(dirPath);
  73. // Block sensitive system directories
  74. if (SENSITIVE_PATHS.has(resolved) || SENSITIVE_PATHS.has(resolved.toLowerCase())) {
  75. return `Refusing to operate on sensitive system directory: ${resolved}`;
  76. }
  77. // Also block common sensitive home subdirectories
  78. const homeDir = require('os').homedir();
  79. const sensitiveHomeDirs = ['.ssh', '.gnupg', '.aws', '.config'];
  80. for (const dir of sensitiveHomeDirs) {
  81. const sensitivePath = path.join(homeDir, dir);
  82. if (resolved === sensitivePath || resolved.startsWith(sensitivePath + path.sep)) {
  83. return `Refusing to operate on sensitive directory: ${resolved}`;
  84. }
  85. }
  86. // Verify it's a real directory
  87. try {
  88. const stats = fs.statSync(resolved);
  89. if (!stats.isDirectory()) {
  90. return `Path is not a directory: ${resolved}`;
  91. }
  92. } catch {
  93. return `Path does not exist or is not accessible: ${resolved}`;
  94. }
  95. return null;
  96. }
  97. /**
  98. * Check if a file path resolves to a location within the given root directory.
  99. *
  100. * Prevents path traversal attacks by ensuring the resolved absolute path
  101. * starts with the resolved root path. Handles '..' sequences, symlink-like
  102. * relative paths, and platform-specific separators.
  103. *
  104. * @param filePath - The path to check (can be relative or absolute)
  105. * @param rootDir - The root directory that filePath must stay within
  106. * @returns true if filePath resolves to a location within rootDir
  107. */
  108. export function isPathWithinRoot(filePath: string, rootDir: string): boolean {
  109. const resolvedPath = path.resolve(rootDir, filePath);
  110. const resolvedRoot = path.resolve(rootDir);
  111. return resolvedPath.startsWith(resolvedRoot + path.sep) || resolvedPath === resolvedRoot;
  112. }
  113. /**
  114. * Like isPathWithinRoot but also resolves symlinks via fs.realpathSync.
  115. *
  116. * This catches symlink escapes where the logical path appears to be within
  117. * root but the real path on disk points elsewhere. Falls back to logical
  118. * path checking if realpath resolution fails (e.g. broken symlink).
  119. */
  120. export function isPathWithinRootReal(filePath: string, rootDir: string): boolean {
  121. // First do the cheap logical check
  122. if (!isPathWithinRoot(filePath, rootDir)) {
  123. return false;
  124. }
  125. // Then verify with realpath to catch symlink escapes
  126. try {
  127. const realPath = fs.realpathSync(path.resolve(rootDir, filePath));
  128. const realRoot = fs.realpathSync(rootDir);
  129. return realPath.startsWith(realRoot + path.sep) || realPath === realRoot;
  130. } catch {
  131. // If realpath fails (broken symlink, permissions), fall back to logical check
  132. return true;
  133. }
  134. }
  135. /**
  136. * Safely parse JSON with a fallback value.
  137. * Prevents crashes from corrupted database metadata.
  138. */
  139. export function safeJsonParse<T>(value: string, fallback: T): T {
  140. try {
  141. return JSON.parse(value);
  142. } catch {
  143. return fallback;
  144. }
  145. }
  146. /**
  147. * Clamp a numeric value to a range.
  148. * Used to enforce sane limits on MCP tool inputs.
  149. */
  150. export function clamp(value: number, min: number, max: number): number {
  151. return Math.max(min, Math.min(max, value));
  152. }
  153. /**
  154. * Normalize a file path to use forward slashes.
  155. * Fixes Windows backslash paths so glob matching works consistently.
  156. */
  157. export function normalizePath(filePath: string): string {
  158. return filePath.replace(/\\/g, '/');
  159. }
  160. /**
  161. * Cross-process file lock using a lock file with PID tracking.
  162. *
  163. * Prevents multiple processes (e.g., git hooks, CLI, MCP server) from
  164. * writing to the same database simultaneously.
  165. */
  166. export class FileLock {
  167. private lockPath: string;
  168. private held = false;
  169. /** Locks older than this are considered stale regardless of PID status */
  170. private static readonly STALE_TIMEOUT_MS = 2 * 60 * 1000; // 2 minutes
  171. constructor(lockPath: string) {
  172. this.lockPath = lockPath;
  173. }
  174. /**
  175. * Acquire the lock. Throws if the lock is held by another live process.
  176. */
  177. acquire(): void {
  178. // Check for existing lock
  179. if (fs.existsSync(this.lockPath)) {
  180. try {
  181. const content = fs.readFileSync(this.lockPath, 'utf-8').trim();
  182. const pid = parseInt(content, 10);
  183. const stat = fs.statSync(this.lockPath);
  184. const lockAge = Date.now() - stat.mtimeMs;
  185. // Treat locks older than the timeout as stale, regardless of PID
  186. if (lockAge < FileLock.STALE_TIMEOUT_MS && !isNaN(pid) && this.isProcessAlive(pid)) {
  187. throw new Error(
  188. `CodeGraph database is locked by another process (PID ${pid}). ` +
  189. `If this is stale, run 'codegraph unlock' or delete ${this.lockPath}`
  190. );
  191. }
  192. // Stale lock (dead process or timed out) - remove it
  193. fs.unlinkSync(this.lockPath);
  194. } catch (err) {
  195. if (err instanceof Error && err.message.includes('locked by another')) {
  196. throw err;
  197. }
  198. // Other errors reading lock file - try to remove it
  199. try { fs.unlinkSync(this.lockPath); } catch { /* ignore */ }
  200. }
  201. }
  202. // Write our PID to the lock file using exclusive create flag
  203. try {
  204. fs.writeFileSync(this.lockPath, String(process.pid), { flag: 'wx' });
  205. this.held = true;
  206. } catch (err: any) {
  207. if (err.code === 'EEXIST') {
  208. // Race condition: another process grabbed the lock between our check and write
  209. throw new Error(
  210. 'CodeGraph database is locked by another process. ' +
  211. `If this is stale, run 'codegraph unlock' or delete ${this.lockPath}`
  212. );
  213. }
  214. throw err;
  215. }
  216. }
  217. /**
  218. * Release the lock
  219. */
  220. release(): void {
  221. if (!this.held) return;
  222. try {
  223. // Only remove if we still own it (check PID)
  224. const content = fs.readFileSync(this.lockPath, 'utf-8').trim();
  225. if (parseInt(content, 10) === process.pid) {
  226. fs.unlinkSync(this.lockPath);
  227. }
  228. } catch {
  229. // Lock file already gone - that's fine
  230. }
  231. this.held = false;
  232. }
  233. /**
  234. * Execute a function while holding the lock
  235. */
  236. withLock<T>(fn: () => T): T {
  237. this.acquire();
  238. try {
  239. return fn();
  240. } finally {
  241. this.release();
  242. }
  243. }
  244. /**
  245. * Execute an async function while holding the lock
  246. */
  247. async withLockAsync<T>(fn: () => Promise<T>): Promise<T> {
  248. this.acquire();
  249. try {
  250. return await fn();
  251. } finally {
  252. this.release();
  253. }
  254. }
  255. /**
  256. * Check if a process is still running
  257. */
  258. private isProcessAlive(pid: number): boolean {
  259. try {
  260. process.kill(pid, 0);
  261. return true;
  262. } catch {
  263. return false;
  264. }
  265. }
  266. }
  267. /**
  268. * Process items in batches to manage memory
  269. *
  270. * @param items - Array of items to process
  271. * @param batchSize - Number of items per batch
  272. * @param processor - Function to process each item
  273. * @param onBatchComplete - Optional callback after each batch
  274. * @returns Array of results
  275. */
  276. export async function processInBatches<T, R>(
  277. items: T[],
  278. batchSize: number,
  279. processor: (item: T, index: number) => Promise<R>,
  280. onBatchComplete?: (completed: number, total: number) => void
  281. ): Promise<R[]> {
  282. const results: R[] = [];
  283. for (let i = 0; i < items.length; i += batchSize) {
  284. const batch = items.slice(i, Math.min(i + batchSize, items.length));
  285. const batchResults = await Promise.all(
  286. batch.map((item, idx) => processor(item, i + idx))
  287. );
  288. results.push(...batchResults);
  289. if (onBatchComplete) {
  290. onBatchComplete(Math.min(i + batchSize, items.length), items.length);
  291. }
  292. // Allow GC between batches
  293. if (global.gc) {
  294. global.gc();
  295. }
  296. }
  297. return results;
  298. }
  299. /**
  300. * Simple mutex lock for preventing concurrent operations
  301. */
  302. export class Mutex {
  303. private locked = false;
  304. private waitQueue: Array<() => void> = [];
  305. /**
  306. * Acquire the lock
  307. *
  308. * @returns A release function to call when done
  309. */
  310. async acquire(): Promise<() => void> {
  311. while (this.locked) {
  312. await new Promise<void>((resolve) => {
  313. this.waitQueue.push(resolve);
  314. });
  315. }
  316. this.locked = true;
  317. return () => {
  318. this.locked = false;
  319. const next = this.waitQueue.shift();
  320. if (next) {
  321. next();
  322. }
  323. };
  324. }
  325. /**
  326. * Execute a function while holding the lock
  327. */
  328. async withLock<T>(fn: () => Promise<T> | T): Promise<T> {
  329. const release = await this.acquire();
  330. try {
  331. return await fn();
  332. } finally {
  333. release();
  334. }
  335. }
  336. /**
  337. * Check if the lock is currently held
  338. */
  339. isLocked(): boolean {
  340. return this.locked;
  341. }
  342. }
  343. /**
  344. * Chunked file reader for large files
  345. *
  346. * Reads a file in chunks to avoid loading entire file into memory.
  347. */
  348. export async function* readFileInChunks(
  349. filePath: string,
  350. chunkSize: number = 64 * 1024
  351. ): AsyncGenerator<string, void, undefined> {
  352. const fs = await import('fs');
  353. const fd = fs.openSync(filePath, 'r');
  354. const buffer = Buffer.alloc(chunkSize);
  355. try {
  356. let bytesRead: number;
  357. while ((bytesRead = fs.readSync(fd, buffer, 0, chunkSize, null)) > 0) {
  358. yield buffer.toString('utf-8', 0, bytesRead);
  359. }
  360. } finally {
  361. fs.closeSync(fd);
  362. }
  363. }
  364. /**
  365. * Debounce a function
  366. *
  367. * @param fn - Function to debounce
  368. * @param delay - Delay in milliseconds
  369. * @returns Debounced function
  370. */
  371. export function debounce<T extends (...args: unknown[]) => unknown>(
  372. fn: T,
  373. delay: number
  374. ): (...args: Parameters<T>) => void {
  375. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  376. return (...args: Parameters<T>) => {
  377. if (timeoutId) {
  378. clearTimeout(timeoutId);
  379. }
  380. timeoutId = setTimeout(() => {
  381. fn(...args);
  382. timeoutId = null;
  383. }, delay);
  384. };
  385. }
  386. /**
  387. * Throttle a function
  388. *
  389. * @param fn - Function to throttle
  390. * @param limit - Minimum time between calls in milliseconds
  391. * @returns Throttled function
  392. */
  393. export function throttle<T extends (...args: unknown[]) => unknown>(
  394. fn: T,
  395. limit: number
  396. ): (...args: Parameters<T>) => void {
  397. let lastCall = 0;
  398. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  399. return (...args: Parameters<T>) => {
  400. const now = Date.now();
  401. const remaining = limit - (now - lastCall);
  402. if (remaining <= 0) {
  403. if (timeoutId) {
  404. clearTimeout(timeoutId);
  405. timeoutId = null;
  406. }
  407. lastCall = now;
  408. fn(...args);
  409. } else if (!timeoutId) {
  410. timeoutId = setTimeout(() => {
  411. lastCall = Date.now();
  412. timeoutId = null;
  413. fn(...args);
  414. }, remaining);
  415. }
  416. };
  417. }
  418. /**
  419. * Estimate memory usage of an object (rough approximation)
  420. *
  421. * @param obj - Object to measure
  422. * @returns Approximate size in bytes
  423. */
  424. export function estimateSize(obj: unknown): number {
  425. const seen = new WeakSet();
  426. function sizeOf(value: unknown): number {
  427. if (value === null || value === undefined) {
  428. return 0;
  429. }
  430. switch (typeof value) {
  431. case 'boolean':
  432. return 4;
  433. case 'number':
  434. return 8;
  435. case 'string':
  436. return 2 * (value as string).length;
  437. case 'object':
  438. if (seen.has(value as object)) {
  439. return 0;
  440. }
  441. seen.add(value as object);
  442. if (Array.isArray(value)) {
  443. return value.reduce((acc: number, item) => acc + sizeOf(item), 0);
  444. }
  445. return Object.entries(value as object).reduce(
  446. (acc, [key, val]) => acc + sizeOf(key) + sizeOf(val),
  447. 0
  448. );
  449. default:
  450. return 0;
  451. }
  452. }
  453. return sizeOf(obj);
  454. }
  455. /**
  456. * Memory monitor for tracking usage during operations
  457. */
  458. export class MemoryMonitor {
  459. private checkInterval: ReturnType<typeof setInterval> | null = null;
  460. private peakUsage = 0;
  461. private threshold: number;
  462. private onThresholdExceeded?: (usage: number) => void;
  463. constructor(
  464. thresholdMB: number = 500,
  465. onThresholdExceeded?: (usage: number) => void
  466. ) {
  467. this.threshold = thresholdMB * 1024 * 1024;
  468. this.onThresholdExceeded = onThresholdExceeded;
  469. }
  470. /**
  471. * Start monitoring memory usage
  472. */
  473. start(intervalMs: number = 1000): void {
  474. this.stop();
  475. this.peakUsage = 0;
  476. this.checkInterval = setInterval(() => {
  477. const usage = process.memoryUsage().heapUsed;
  478. if (usage > this.peakUsage) {
  479. this.peakUsage = usage;
  480. }
  481. if (usage > this.threshold && this.onThresholdExceeded) {
  482. this.onThresholdExceeded(usage);
  483. }
  484. }, intervalMs);
  485. }
  486. /**
  487. * Stop monitoring
  488. */
  489. stop(): void {
  490. if (this.checkInterval) {
  491. clearInterval(this.checkInterval);
  492. this.checkInterval = null;
  493. }
  494. }
  495. /**
  496. * Get peak memory usage in bytes
  497. */
  498. getPeakUsage(): number {
  499. return this.peakUsage;
  500. }
  501. /**
  502. * Get current memory usage in bytes
  503. */
  504. getCurrentUsage(): number {
  505. return process.memoryUsage().heapUsed;
  506. }
  507. }