utils.ts 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /**
  2. * CodeGraph Utilities
  3. *
  4. * Common utility functions for memory management, concurrency, batching,
  5. * and security validation.
  6. *
  7. * @module utils
  8. *
  9. * @example
  10. * ```typescript
  11. * import { Mutex, processInBatches, MemoryMonitor, validatePathWithinRoot } from 'codegraph';
  12. *
  13. * // Use mutex for concurrent safety
  14. * const mutex = new Mutex();
  15. * await mutex.withLock(async () => {
  16. * await performCriticalOperation();
  17. * });
  18. *
  19. * // Process items in batches to manage memory
  20. * const results = await processInBatches(items, 100, async (item) => {
  21. * return await processItem(item);
  22. * });
  23. *
  24. * // Monitor memory usage
  25. * const monitor = new MemoryMonitor(512, (usage) => {
  26. * console.warn(`Memory usage exceeded 512MB: ${usage / 1024 / 1024}MB`);
  27. * });
  28. * monitor.start();
  29. * ```
  30. */
  31. import * as path from 'path';
  32. import * as fs from 'fs';
  33. // ============================================================
  34. // SECURITY UTILITIES
  35. // ============================================================
  36. /**
  37. * Validate that a resolved file path stays within the project root.
  38. * Prevents path traversal attacks (e.g. node.filePath = "../../etc/passwd").
  39. *
  40. * @param projectRoot - The project root directory
  41. * @param filePath - The relative file path to validate
  42. * @returns The resolved absolute path, or null if it escapes the root
  43. */
  44. export function validatePathWithinRoot(projectRoot: string, filePath: string): string | null {
  45. const resolved = path.resolve(projectRoot, filePath);
  46. const normalizedRoot = path.resolve(projectRoot);
  47. if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) {
  48. return null;
  49. }
  50. return resolved;
  51. }
  52. /**
  53. * Safely parse JSON with a fallback value.
  54. * Prevents crashes from corrupted database metadata.
  55. */
  56. export function safeJsonParse<T>(value: string, fallback: T): T {
  57. try {
  58. return JSON.parse(value);
  59. } catch {
  60. return fallback;
  61. }
  62. }
  63. /**
  64. * Clamp a numeric value to a range.
  65. * Used to enforce sane limits on MCP tool inputs.
  66. */
  67. export function clamp(value: number, min: number, max: number): number {
  68. return Math.max(min, Math.min(max, value));
  69. }
  70. /**
  71. * Normalize a file path to use forward slashes.
  72. * Fixes Windows backslash paths so glob matching works consistently.
  73. */
  74. export function normalizePath(filePath: string): string {
  75. return filePath.replace(/\\/g, '/');
  76. }
  77. /**
  78. * Cross-process file lock using lock files.
  79. * Prevents concurrent database writes from CLI, MCP server, and git hooks.
  80. */
  81. export class FileLock {
  82. private lockPath: string;
  83. private acquired = false;
  84. constructor(resourcePath: string) {
  85. this.lockPath = resourcePath + '.lock';
  86. }
  87. /**
  88. * Acquire the file lock. Waits up to timeoutMs for the lock.
  89. * Cleans up stale locks older than staleLockMs.
  90. */
  91. async acquire(timeoutMs: number = 10000, staleLockMs: number = 30000): Promise<boolean> {
  92. const start = Date.now();
  93. while (Date.now() - start < timeoutMs) {
  94. try {
  95. // Try to create lock file exclusively
  96. fs.writeFileSync(this.lockPath, String(process.pid), { flag: 'wx' });
  97. this.acquired = true;
  98. return true;
  99. } catch {
  100. // Lock file exists - check if stale
  101. try {
  102. const stat = fs.statSync(this.lockPath);
  103. if (Date.now() - stat.mtimeMs > staleLockMs) {
  104. // Stale lock - remove and retry
  105. fs.unlinkSync(this.lockPath);
  106. continue;
  107. }
  108. } catch {
  109. // Lock file disappeared between check and stat - retry
  110. continue;
  111. }
  112. // Wait and retry
  113. await new Promise(resolve => setTimeout(resolve, 100));
  114. }
  115. }
  116. return false;
  117. }
  118. /**
  119. * Release the file lock
  120. */
  121. release(): void {
  122. if (this.acquired) {
  123. try {
  124. fs.unlinkSync(this.lockPath);
  125. } catch {
  126. // Lock file already removed - that's fine
  127. }
  128. this.acquired = false;
  129. }
  130. }
  131. }
  132. /**
  133. * Process items in batches to manage memory
  134. *
  135. * @param items - Array of items to process
  136. * @param batchSize - Number of items per batch
  137. * @param processor - Function to process each item
  138. * @param onBatchComplete - Optional callback after each batch
  139. * @returns Array of results
  140. */
  141. export async function processInBatches<T, R>(
  142. items: T[],
  143. batchSize: number,
  144. processor: (item: T, index: number) => Promise<R>,
  145. onBatchComplete?: (completed: number, total: number) => void
  146. ): Promise<R[]> {
  147. const results: R[] = [];
  148. for (let i = 0; i < items.length; i += batchSize) {
  149. const batch = items.slice(i, Math.min(i + batchSize, items.length));
  150. const batchResults = await Promise.all(
  151. batch.map((item, idx) => processor(item, i + idx))
  152. );
  153. results.push(...batchResults);
  154. if (onBatchComplete) {
  155. onBatchComplete(Math.min(i + batchSize, items.length), items.length);
  156. }
  157. // Allow GC between batches
  158. if (global.gc) {
  159. global.gc();
  160. }
  161. }
  162. return results;
  163. }
  164. /**
  165. * Simple mutex lock for preventing concurrent operations
  166. */
  167. export class Mutex {
  168. private locked = false;
  169. private waitQueue: Array<() => void> = [];
  170. /**
  171. * Acquire the lock
  172. *
  173. * @returns A release function to call when done
  174. */
  175. async acquire(): Promise<() => void> {
  176. while (this.locked) {
  177. await new Promise<void>((resolve) => {
  178. this.waitQueue.push(resolve);
  179. });
  180. }
  181. this.locked = true;
  182. return () => {
  183. this.locked = false;
  184. const next = this.waitQueue.shift();
  185. if (next) {
  186. next();
  187. }
  188. };
  189. }
  190. /**
  191. * Execute a function while holding the lock
  192. */
  193. async withLock<T>(fn: () => Promise<T> | T): Promise<T> {
  194. const release = await this.acquire();
  195. try {
  196. return await fn();
  197. } finally {
  198. release();
  199. }
  200. }
  201. /**
  202. * Check if the lock is currently held
  203. */
  204. isLocked(): boolean {
  205. return this.locked;
  206. }
  207. }
  208. /**
  209. * Chunked file reader for large files
  210. *
  211. * Reads a file in chunks to avoid loading entire file into memory.
  212. */
  213. export async function* readFileInChunks(
  214. filePath: string,
  215. chunkSize: number = 64 * 1024
  216. ): AsyncGenerator<string, void, undefined> {
  217. const fs = await import('fs');
  218. const fd = fs.openSync(filePath, 'r');
  219. const buffer = Buffer.alloc(chunkSize);
  220. try {
  221. let bytesRead: number;
  222. while ((bytesRead = fs.readSync(fd, buffer, 0, chunkSize, null)) > 0) {
  223. yield buffer.toString('utf-8', 0, bytesRead);
  224. }
  225. } finally {
  226. fs.closeSync(fd);
  227. }
  228. }
  229. /**
  230. * Debounce a function
  231. *
  232. * @param fn - Function to debounce
  233. * @param delay - Delay in milliseconds
  234. * @returns Debounced function
  235. */
  236. export function debounce<T extends (...args: unknown[]) => unknown>(
  237. fn: T,
  238. delay: number
  239. ): (...args: Parameters<T>) => void {
  240. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  241. return (...args: Parameters<T>) => {
  242. if (timeoutId) {
  243. clearTimeout(timeoutId);
  244. }
  245. timeoutId = setTimeout(() => {
  246. fn(...args);
  247. timeoutId = null;
  248. }, delay);
  249. };
  250. }
  251. /**
  252. * Throttle a function
  253. *
  254. * @param fn - Function to throttle
  255. * @param limit - Minimum time between calls in milliseconds
  256. * @returns Throttled function
  257. */
  258. export function throttle<T extends (...args: unknown[]) => unknown>(
  259. fn: T,
  260. limit: number
  261. ): (...args: Parameters<T>) => void {
  262. let lastCall = 0;
  263. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  264. return (...args: Parameters<T>) => {
  265. const now = Date.now();
  266. const remaining = limit - (now - lastCall);
  267. if (remaining <= 0) {
  268. if (timeoutId) {
  269. clearTimeout(timeoutId);
  270. timeoutId = null;
  271. }
  272. lastCall = now;
  273. fn(...args);
  274. } else if (!timeoutId) {
  275. timeoutId = setTimeout(() => {
  276. lastCall = Date.now();
  277. timeoutId = null;
  278. fn(...args);
  279. }, remaining);
  280. }
  281. };
  282. }
  283. /**
  284. * Estimate memory usage of an object (rough approximation)
  285. *
  286. * @param obj - Object to measure
  287. * @returns Approximate size in bytes
  288. */
  289. export function estimateSize(obj: unknown): number {
  290. const seen = new WeakSet();
  291. function sizeOf(value: unknown): number {
  292. if (value === null || value === undefined) {
  293. return 0;
  294. }
  295. switch (typeof value) {
  296. case 'boolean':
  297. return 4;
  298. case 'number':
  299. return 8;
  300. case 'string':
  301. return 2 * (value as string).length;
  302. case 'object':
  303. if (seen.has(value as object)) {
  304. return 0;
  305. }
  306. seen.add(value as object);
  307. if (Array.isArray(value)) {
  308. return value.reduce((acc: number, item) => acc + sizeOf(item), 0);
  309. }
  310. return Object.entries(value as object).reduce(
  311. (acc, [key, val]) => acc + sizeOf(key) + sizeOf(val),
  312. 0
  313. );
  314. default:
  315. return 0;
  316. }
  317. }
  318. return sizeOf(obj);
  319. }
  320. /**
  321. * Memory monitor for tracking usage during operations
  322. */
  323. export class MemoryMonitor {
  324. private checkInterval: ReturnType<typeof setInterval> | null = null;
  325. private peakUsage = 0;
  326. private threshold: number;
  327. private onThresholdExceeded?: (usage: number) => void;
  328. constructor(
  329. thresholdMB: number = 500,
  330. onThresholdExceeded?: (usage: number) => void
  331. ) {
  332. this.threshold = thresholdMB * 1024 * 1024;
  333. this.onThresholdExceeded = onThresholdExceeded;
  334. }
  335. /**
  336. * Start monitoring memory usage
  337. */
  338. start(intervalMs: number = 1000): void {
  339. this.stop();
  340. this.peakUsage = 0;
  341. this.checkInterval = setInterval(() => {
  342. const usage = process.memoryUsage().heapUsed;
  343. if (usage > this.peakUsage) {
  344. this.peakUsage = usage;
  345. }
  346. if (usage > this.threshold && this.onThresholdExceeded) {
  347. this.onThresholdExceeded(usage);
  348. }
  349. }, intervalMs);
  350. }
  351. /**
  352. * Stop monitoring
  353. */
  354. stop(): void {
  355. if (this.checkInterval) {
  356. clearInterval(this.checkInterval);
  357. this.checkInterval = null;
  358. }
  359. }
  360. /**
  361. * Get peak memory usage in bytes
  362. */
  363. getPeakUsage(): number {
  364. return this.peakUsage;
  365. }
  366. /**
  367. * Get current memory usage in bytes
  368. */
  369. getCurrentUsage(): number {
  370. return process.memoryUsage().heapUsed;
  371. }
  372. }