Browse Source

Security hardening: path validation, input clamping, safe JSON, file locking

Implements security improvements inspired by PR #16 (credit: MO2k4):

- Add validatePathWithinRoot() to prevent path traversal attacks in
  extraction and context building
- Clamp MCP tool inputs (limit, depth, maxDepth) to sane ranges
- Use atomic writes (temp file + rename) for config saves
- Add symlink cycle detection in directory scanning to prevent infinite loops
- Replace all JSON.parse calls in db/queries.ts with safeJsonParse fallbacks
  to handle corrupted database metadata gracefully
- Add cross-process FileLock for DB write operations (indexAll, indexFiles,
  sync) to prevent concurrent writes from CLI, MCP server, and git hooks
- Remove unused path import from context/index.ts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Colby McHenry 4 tháng trước cách đây
mục cha
commit
932c567d18
7 tập tin đã thay đổi với 257 bổ sung55 xóa
  1. 5 1
      src/config.ts
  2. 3 3
      src/context/index.ts
  3. 7 6
      src/db/queries.ts
  4. 59 1
      src/extraction/index.ts
  5. 67 37
      src/index.ts
  6. 6 5
      src/mcp/tools.ts
  7. 110 2
      src/utils.ts

+ 5 - 1
src/config.ts

@@ -154,7 +154,11 @@ export function saveConfig(projectRoot: string, config: CodeGraphConfig): void {
   delete (toSave as Partial<CodeGraphConfig>).rootDir;
 
   const content = JSON.stringify(toSave, null, 2);
-  fs.writeFileSync(configPath, content, 'utf-8');
+
+  // Atomic write: write to temp file then rename to prevent partial/corrupt configs
+  const tmpPath = configPath + '.tmp';
+  fs.writeFileSync(tmpPath, content, 'utf-8');
+  fs.renameSync(tmpPath, configPath);
 }
 
 /**

+ 3 - 3
src/context/index.ts

@@ -6,7 +6,6 @@
  */
 
 import * as fs from 'fs';
-import * as path from 'path';
 import {
   Node,
   Edge,
@@ -25,6 +24,7 @@ import { GraphTraverser } from '../graph';
 import { VectorManager } from '../vectors';
 import { formatContextAsMarkdown, formatContextAsJson } from './formatter';
 import { logDebug, logWarn } from '../errors';
+import { validatePathWithinRoot } from '../utils';
 
 /**
  * Extract likely symbol names from a natural language query
@@ -438,9 +438,9 @@ export class ContextBuilder {
    * Extract code from a node's source file
    */
   private async extractNodeCode(node: Node): Promise<string | null> {
-    const filePath = path.join(this.projectRoot, node.filePath);
+    const filePath = validatePathWithinRoot(this.projectRoot, node.filePath);
 
-    if (!fs.existsSync(filePath)) {
+    if (!filePath || !fs.existsSync(filePath)) {
       return null;
     }
 

+ 7 - 6
src/db/queries.ts

@@ -17,6 +17,7 @@ import {
   SearchOptions,
   SearchResult,
 } from '../types';
+import { safeJsonParse } from '../utils';
 
 /**
  * Database row types (snake_case from SQLite)
@@ -97,8 +98,8 @@ function rowToNode(row: NodeRow): Node {
     isAsync: row.is_async === 1,
     isStatic: row.is_static === 1,
     isAbstract: row.is_abstract === 1,
-    decorators: row.decorators ? JSON.parse(row.decorators) : undefined,
-    typeParameters: row.type_parameters ? JSON.parse(row.type_parameters) : undefined,
+    decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined,
+    typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined,
     updatedAt: row.updated_at,
   };
 }
@@ -111,7 +112,7 @@ function rowToEdge(row: EdgeRow): Edge {
     source: row.source,
     target: row.target,
     kind: row.kind as EdgeKind,
-    metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
+    metadata: row.metadata ? safeJsonParse(row.metadata, undefined) : undefined,
     line: row.line ?? undefined,
     column: row.col ?? undefined,
   };
@@ -129,7 +130,7 @@ function rowToFileRecord(row: FileRow): FileRecord {
     modifiedAt: row.modified_at,
     indexedAt: row.indexed_at,
     nodeCount: row.node_count,
-    errors: row.errors ? JSON.parse(row.errors) : undefined,
+    errors: row.errors ? safeJsonParse(row.errors, undefined) : undefined,
   };
 }
 
@@ -820,7 +821,7 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
-      candidates: row.candidates ? JSON.parse(row.candidates) : undefined,
+      candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
     }));
   }
 
@@ -835,7 +836,7 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
-      candidates: row.candidates ? JSON.parse(row.candidates) : undefined,
+      candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
     }));
   }
 

+ 59 - 1
src/extraction/index.ts

@@ -19,6 +19,7 @@ import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported } from './grammars';
 import { logDebug } from '../errors';
 import { captureException } from '../sentry';
+import { validatePathWithinRoot } from '../utils';
 
 /**
  * Progress callback for indexing operations
@@ -127,8 +128,22 @@ export function scanDirectory(
 ): string[] {
   const files: string[] = [];
   let count = 0;
+  const visitedRealPaths = new Set<string>(); // Symlink cycle detection
 
   function walk(dir: string): void {
+    // Symlink cycle detection: resolve real path and skip if already visited
+    try {
+      const realDir = fs.realpathSync(dir);
+      if (visitedRealPaths.has(realDir)) {
+        logDebug('Skipping directory to prevent symlink cycle', { dir, realDir });
+        return;
+      }
+      visitedRealPaths.add(realDir);
+    } catch {
+      // If realpath fails, skip this directory
+      return;
+    }
+
     // Check for .codegraphignore marker file - skip entire directory tree if present
     const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
     if (fs.existsSync(ignoreMarker)) {
@@ -149,6 +164,39 @@ export function scanDirectory(
       const fullPath = path.join(dir, entry.name);
       const relativePath = path.relative(rootDir, fullPath);
 
+      // Follow symlinked directories, but skip symlinked files to non-project targets
+      if (entry.isSymbolicLink()) {
+        try {
+          const realTarget = fs.realpathSync(fullPath);
+          const stat = fs.statSync(realTarget);
+          if (stat.isDirectory()) {
+            // Check exclusion, then recurse (cycle detection handles the rest)
+            const dirPattern = relativePath + '/';
+            let excluded = false;
+            for (const pattern of config.exclude) {
+              if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
+                excluded = true;
+                break;
+              }
+            }
+            if (!excluded) {
+              walk(fullPath);
+            }
+          } else if (stat.isFile()) {
+            if (shouldIncludeFile(relativePath, config)) {
+              files.push(relativePath);
+              count++;
+              if (onProgress) {
+                onProgress(count, relativePath);
+              }
+            }
+          }
+        } catch {
+          logDebug('Skipping broken symlink', { path: fullPath });
+        }
+        continue;
+      }
+
       if (entry.isDirectory()) {
         // Check if directory should be excluded
         const dirPattern = relativePath + '/';
@@ -335,7 +383,17 @@ export class ExtractionOrchestrator {
    * Index a single file
    */
   async indexFile(relativePath: string): Promise<ExtractionResult> {
-    const fullPath = path.join(this.rootDir, relativePath);
+    const fullPath = validatePathWithinRoot(this.rootDir, relativePath);
+
+    if (!fullPath) {
+      return {
+        nodes: [],
+        edges: [],
+        unresolvedReferences: [],
+        errors: [{ message: `Path traversal blocked: ${relativePath}`, severity: 'error' }],
+        durationMs: 0,
+      };
+    }
 
     // Check file exists and is readable
     let content: string;

+ 67 - 37
src/index.ts

@@ -47,7 +47,7 @@ import {
 import { GraphTraverser, GraphQueryManager } from './graph';
 import { VectorManager, createVectorManager, EmbeddingProgress } from './vectors';
 import { ContextBuilder, createContextBuilder } from './context';
-import { Mutex } from './utils';
+import { Mutex, FileLock } from './utils';
 
 // Re-export types for consumers
 export * from './types';
@@ -77,7 +77,7 @@ export {
   silentLogger,
   defaultLogger,
 } from './errors';
-export { Mutex, processInBatches, debounce, throttle, MemoryMonitor } from './utils';
+export { Mutex, FileLock, processInBatches, debounce, throttle, MemoryMonitor } from './utils';
 export { MCPServer } from './mcp';
 
 /**
@@ -133,9 +133,12 @@ export class CodeGraph {
   private vectorManager: VectorManager | null = null;
   private contextBuilder: ContextBuilder;
 
-  // Mutex for preventing concurrent indexing operations
+  // Mutex for preventing concurrent indexing operations (in-process)
   private indexMutex = new Mutex();
 
+  // File lock for preventing concurrent writes across processes (CLI, MCP, git hooks)
+  private fileLock: FileLock;
+
   private constructor(
     db: DatabaseConnection,
     queries: QueryBuilder,
@@ -146,6 +149,7 @@ export class CodeGraph {
     this.queries = queries;
     this.config = config;
     this.projectRoot = projectRoot;
+    this.fileLock = new FileLock(db.getPath());
     this.orchestrator = new ExtractionOrchestrator(projectRoot, config, queries);
     this.resolver = createResolver(projectRoot, queries);
     this.graphManager = new GraphQueryManager(queries);
@@ -317,6 +321,8 @@ export class CodeGraph {
    * Close the CodeGraph instance and release resources
    */
   close(): void {
+    // Release file lock if held
+    this.fileLock.release();
     // Dispose vector manager first to release ONNX workers
     if (this.vectorManager) {
       this.vectorManager.dispose();
@@ -369,29 +375,37 @@ export class CodeGraph {
    */
   async indexAll(options: IndexOptions = {}): Promise<IndexResult> {
     return this.indexMutex.withLock(async () => {
-      const result = await this.orchestrator.indexAll(options.onProgress, options.signal);
-
-      // Resolve references to create call/import/extends edges
-      if (result.success && result.filesIndexed > 0) {
-        // Get count of unresolved references for accurate progress
-        const unresolvedCount = this.queries.getUnresolvedReferences().length;
+      const locked = await this.fileLock.acquire();
+      if (!locked) {
+        return { success: false, filesIndexed: 0, filesSkipped: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
+      }
+      try {
+        const result = await this.orchestrator.indexAll(options.onProgress, options.signal);
 
-        options.onProgress?.({
-          phase: 'resolving',
-          current: 0,
-          total: unresolvedCount,
-        });
+        // Resolve references to create call/import/extends edges
+        if (result.success && result.filesIndexed > 0) {
+          // Get count of unresolved references for accurate progress
+          const unresolvedCount = this.queries.getUnresolvedReferences().length;
 
-        this.resolveReferences((current, total) => {
           options.onProgress?.({
             phase: 'resolving',
-            current,
-            total,
+            current: 0,
+            total: unresolvedCount,
           });
-        });
-      }
 
-      return result;
+          this.resolveReferences((current, total) => {
+            options.onProgress?.({
+              phase: 'resolving',
+              current,
+              total,
+            });
+          });
+        }
+
+        return result;
+      } finally {
+        this.fileLock.release();
+      }
     });
   }
 
@@ -402,7 +416,15 @@ export class CodeGraph {
    */
   async indexFiles(filePaths: string[]): Promise<IndexResult> {
     return this.indexMutex.withLock(async () => {
-      return this.orchestrator.indexFiles(filePaths);
+      const locked = await this.fileLock.acquire();
+      if (!locked) {
+        return { success: false, filesIndexed: 0, filesSkipped: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
+      }
+      try {
+        return this.orchestrator.indexFiles(filePaths);
+      } finally {
+        this.fileLock.release();
+      }
     });
   }
 
@@ -413,28 +435,36 @@ export class CodeGraph {
    */
   async sync(options: IndexOptions = {}): Promise<SyncResult> {
     return this.indexMutex.withLock(async () => {
-      const result = await this.orchestrator.sync(options.onProgress);
-
-      // Resolve references if files were updated
-      if (result.filesAdded > 0 || result.filesModified > 0) {
-        const unresolvedCount = this.queries.getUnresolvedReferences().length;
+      const locked = await this.fileLock.acquire();
+      if (!locked) {
+        return { filesChecked: 0, filesAdded: 0, filesModified: 0, filesRemoved: 0, nodesUpdated: 0, durationMs: 0 };
+      }
+      try {
+        const result = await this.orchestrator.sync(options.onProgress);
 
-        options.onProgress?.({
-          phase: 'resolving',
-          current: 0,
-          total: unresolvedCount,
-        });
+        // Resolve references if files were updated
+        if (result.filesAdded > 0 || result.filesModified > 0) {
+          const unresolvedCount = this.queries.getUnresolvedReferences().length;
 
-        this.resolveReferences((current, total) => {
           options.onProgress?.({
             phase: 'resolving',
-            current,
-            total,
+            current: 0,
+            total: unresolvedCount,
           });
-        });
-      }
 
-      return result;
+          this.resolveReferences((current, total) => {
+            options.onProgress?.({
+              phase: 'resolving',
+              current,
+              total,
+            });
+          });
+        }
+
+        return result;
+      } finally {
+        this.fileLock.release();
+      }
     });
   }
 

+ 6 - 5
src/mcp/tools.ts

@@ -8,6 +8,7 @@ import CodeGraph, { findNearestCodeGraphRoot } from '../index';
 import type { Node, SearchResult, Subgraph, TaskContext, NodeKind } from '../types';
 import { createHash } from 'crypto';
 import { writeFileSync } from 'fs';
+import { clamp } from '../utils';
 import { tmpdir } from 'os';
 import { join } from 'path';
 
@@ -351,7 +352,7 @@ export class ToolHandler {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const query = args.query as string;
     const kind = args.kind as string | undefined;
-    const limit = (args.limit as number) || 10;
+    const limit = clamp((args.limit as number) || 10, 1, 100);
 
     const results = cg.searchNodes(query, {
       limit,
@@ -436,7 +437,7 @@ export class ToolHandler {
   private async handleCallers(args: Record<string, unknown>): Promise<ToolResult> {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const symbol = args.symbol as string;
-    const limit = (args.limit as number) || 20;
+    const limit = clamp((args.limit as number) || 20, 1, 100);
 
     // First find the node by name
     const results = cg.searchNodes(symbol, { limit: 1 });
@@ -463,7 +464,7 @@ export class ToolHandler {
   private async handleCallees(args: Record<string, unknown>): Promise<ToolResult> {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const symbol = args.symbol as string;
-    const limit = (args.limit as number) || 20;
+    const limit = clamp((args.limit as number) || 20, 1, 100);
 
     // First find the node by name
     const results = cg.searchNodes(symbol, { limit: 1 });
@@ -490,7 +491,7 @@ export class ToolHandler {
   private async handleImpact(args: Record<string, unknown>): Promise<ToolResult> {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const symbol = args.symbol as string;
-    const depth = (args.depth as number) || 2;
+    const depth = clamp((args.depth as number) || 2, 1, 10);
 
     // First find the node by name
     const results = cg.searchNodes(symbol, { limit: 1 });
@@ -574,7 +575,7 @@ export class ToolHandler {
     const pattern = args.pattern as string | undefined;
     const format = (args.format as 'tree' | 'flat' | 'grouped') || 'tree';
     const includeMetadata = args.includeMetadata !== false;
-    const maxDepth = args.maxDepth as number | undefined;
+    const maxDepth = args.maxDepth != null ? clamp(args.maxDepth as number, 1, 20) : undefined;
 
     // Get all files from the index
     const allFiles = cg.getFiles();

+ 110 - 2
src/utils.ts

@@ -1,13 +1,14 @@
 /**
  * CodeGraph Utilities
  *
- * Common utility functions for memory management, concurrency, and batching.
+ * Common utility functions for memory management, concurrency, batching,
+ * and security validation.
  *
  * @module utils
  *
  * @example
  * ```typescript
- * import { Mutex, processInBatches, MemoryMonitor } from 'codegraph';
+ * import { Mutex, processInBatches, MemoryMonitor, validatePathWithinRoot } from 'codegraph';
  *
  * // Use mutex for concurrent safety
  * const mutex = new Mutex();
@@ -28,6 +29,113 @@
  * ```
  */
 
+import * as path from 'path';
+import * as fs from 'fs';
+
+// ============================================================
+// SECURITY UTILITIES
+// ============================================================
+
+/**
+ * Validate that a resolved file path stays within the project root.
+ * Prevents path traversal attacks (e.g. node.filePath = "../../etc/passwd").
+ *
+ * @param projectRoot - The project root directory
+ * @param filePath - The relative file path to validate
+ * @returns The resolved absolute path, or null if it escapes the root
+ */
+export function validatePathWithinRoot(projectRoot: string, filePath: string): string | null {
+  const resolved = path.resolve(projectRoot, filePath);
+  const normalizedRoot = path.resolve(projectRoot);
+
+  if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) {
+    return null;
+  }
+  return resolved;
+}
+
+/**
+ * Safely parse JSON with a fallback value.
+ * Prevents crashes from corrupted database metadata.
+ */
+export function safeJsonParse<T>(value: string, fallback: T): T {
+  try {
+    return JSON.parse(value);
+  } catch {
+    return fallback;
+  }
+}
+
+/**
+ * Clamp a numeric value to a range.
+ * Used to enforce sane limits on MCP tool inputs.
+ */
+export function clamp(value: number, min: number, max: number): number {
+  return Math.max(min, Math.min(max, value));
+}
+
+/**
+ * Cross-process file lock using lock files.
+ * Prevents concurrent database writes from CLI, MCP server, and git hooks.
+ */
+export class FileLock {
+  private lockPath: string;
+  private acquired = false;
+
+  constructor(resourcePath: string) {
+    this.lockPath = resourcePath + '.lock';
+  }
+
+  /**
+   * Acquire the file lock. Waits up to timeoutMs for the lock.
+   * Cleans up stale locks older than staleLockMs.
+   */
+  async acquire(timeoutMs: number = 10000, staleLockMs: number = 30000): Promise<boolean> {
+    const start = Date.now();
+
+    while (Date.now() - start < timeoutMs) {
+      try {
+        // Try to create lock file exclusively
+        fs.writeFileSync(this.lockPath, String(process.pid), { flag: 'wx' });
+        this.acquired = true;
+        return true;
+      } catch {
+        // Lock file exists - check if stale
+        try {
+          const stat = fs.statSync(this.lockPath);
+          if (Date.now() - stat.mtimeMs > staleLockMs) {
+            // Stale lock - remove and retry
+            fs.unlinkSync(this.lockPath);
+            continue;
+          }
+        } catch {
+          // Lock file disappeared between check and stat - retry
+          continue;
+        }
+
+        // Wait and retry
+        await new Promise(resolve => setTimeout(resolve, 100));
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * Release the file lock
+   */
+  release(): void {
+    if (this.acquired) {
+      try {
+        fs.unlinkSync(this.lockPath);
+      } catch {
+        // Lock file already removed - that's fine
+      }
+      this.acquired = false;
+    }
+  }
+}
+
 /**
  * Process items in batches to manage memory
  *