| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407 |
- /**
- * Directory Management
- *
- * Manages the .codegraph/ directory structure for CodeGraph data.
- */
- import * as fs from 'fs';
- import * as os from 'os';
- import * as path from 'path';
- /** The default per-project data directory name. */
- const DEFAULT_CODEGRAPH_DIR = '.codegraph';
- let warnedBadDirName = false;
- /**
- * Resolve the per-project data directory name, honoring the `CODEGRAPH_DIR`
- * environment override (default `.codegraph`). The override is a single path
- * segment that lives in the project root.
- *
- * Why this exists: two environments that share one working tree must NOT share
- * one `.codegraph/` — most concretely Windows-native and WSL (issue #636). The
- * daemon lockfile (`.codegraph/daemon.pid`) records a platform-specific pid and
- * socket path (a Windows named pipe vs a WSL Unix socket), and SQLite file
- * locking across the WSL2 ↔ Windows filesystem boundary is unreliable, so two
- * daemons sharing one index risks corruption. Setting `CODEGRAPH_DIR=.codegraph-win`
- * on one side gives each environment its own index in the same tree.
- *
- * Read live (not captured at load) so it is both process-accurate and testable.
- * An override that isn't a plain directory name — empty, containing a path
- * separator, `.`, `..`/traversal, or absolute — is ignored (we keep the
- * default) rather than risk writing the index outside the project or into the
- * project root itself; we warn once to stderr so the misconfiguration is seen.
- */
- export function codeGraphDirName(): string {
- const raw = process.env.CODEGRAPH_DIR?.trim();
- if (!raw) return DEFAULT_CODEGRAPH_DIR;
- const invalid =
- raw === '.' ||
- raw.includes('..') ||
- raw.includes('/') ||
- raw.includes('\\') ||
- path.isAbsolute(raw);
- if (invalid) {
- if (!warnedBadDirName) {
- warnedBadDirName = true;
- // stderr only — stdout is the MCP protocol channel.
- console.warn(
- `[codegraph] Ignoring invalid CODEGRAPH_DIR="${raw}" — it must be a plain ` +
- `directory name (no path separators, no "..", not absolute). Using "${DEFAULT_CODEGRAPH_DIR}".`
- );
- }
- return DEFAULT_CODEGRAPH_DIR;
- }
- return raw;
- }
- /**
- * CodeGraph directory name — a load-time snapshot of {@link codeGraphDirName}.
- * A running process's environment is fixed, so this equals the live value;
- * it's kept as a stable string export for backward compatibility. Internal code
- * resolves the name through {@link codeGraphDirName} / {@link getCodeGraphDir}
- * so the `CODEGRAPH_DIR` override always applies.
- */
- export const CODEGRAPH_DIR = codeGraphDirName();
- /**
- * Is `name` (a single path segment) a CodeGraph data directory? Matches the
- * default `.codegraph`, the active `CODEGRAPH_DIR` override, and any
- * `.codegraph-*` sibling. File-watching and the indexer skip ALL of these, so
- * when two environments share one working tree (Windows + WSL, issue #636)
- * neither indexes or watches the other's index directory.
- */
- export function isCodeGraphDataDir(name: string): boolean {
- return (
- name === DEFAULT_CODEGRAPH_DIR ||
- name === codeGraphDirName() ||
- name.startsWith(DEFAULT_CODEGRAPH_DIR + '-')
- );
- }
- /**
- * Get the .codegraph directory path for a project
- */
- export function getCodeGraphDir(projectRoot: string): string {
- return path.join(projectRoot, codeGraphDirName());
- }
- /**
- * Check if a project has been initialized with CodeGraph
- * Requires both .codegraph/ directory AND codegraph.db to exist
- */
- export function isInitialized(projectRoot: string): boolean {
- const codegraphDir = getCodeGraphDir(projectRoot);
- if (!fs.existsSync(codegraphDir) || !fs.statSync(codegraphDir).isDirectory()) {
- return false;
- }
- // Must have codegraph.db, not just .codegraph folder
- const dbPath = path.join(codegraphDir, 'codegraph.db');
- return fs.existsSync(dbPath);
- }
- /**
- * Find the nearest parent directory containing .codegraph/
- *
- * Walks up from the given path to find a CodeGraph-initialized project,
- * similar to how git finds .git/ directories.
- *
- * @param startPath - Directory to start searching from
- * @returns The project root containing .codegraph/, or null if not found
- */
- /**
- * Reason a directory is unsafe to use as an index ROOT, or null when it's fine.
- *
- * Indexing your home directory or a filesystem root drags in caches, `Library`,
- * every other project, etc. — a multi-GB index, constant file-watcher churn, and
- * (pre-1.0 on macOS) a file-descriptor blowup that exhausted `kern.maxfiles` and
- * took unrelated apps / the whole machine down (#845). The classic trigger:
- * running the installer or `codegraph init` from `$HOME`, which auto-indexes the
- * current directory. These are never intended project roots, so the installer
- * and `init`/`index` refuse them (overridable with `--force`).
- *
- * Pure-ish (reads only `os.homedir()` + realpath) so it's easy to unit-test.
- * The returned string is a human phrase that slots into "… looks like {reason}".
- */
- export function unsafeIndexRootReason(projectRoot: string): string | null {
- const resolve = (p: string): string => {
- try {
- return fs.realpathSync(path.resolve(p));
- } catch {
- return path.resolve(p);
- }
- };
- const resolved = resolve(projectRoot);
- // Filesystem root: `/` on POSIX, a drive root like `C:\` on Windows.
- if (path.parse(resolved).root === resolved) {
- return 'the filesystem root';
- }
- const home = resolve(os.homedir());
- // Case-insensitive on macOS/Windows (case-preserving but case-insensitive FS).
- const norm = (p: string): string =>
- process.platform === 'darwin' || process.platform === 'win32' ? p.toLowerCase() : p;
- const r = norm(resolved);
- const h = norm(home);
- if (r === h) {
- return 'your home directory';
- }
- // An ancestor of home (e.g. `/Users`, `/home`) — even broader than home.
- if (h.startsWith(r + path.sep)) {
- return 'a parent of your home directory';
- }
- return null;
- }
- export function findNearestCodeGraphRoot(startPath: string): string | null {
- let current = path.resolve(startPath);
- const root = path.parse(current).root;
- while (current !== root) {
- if (isInitialized(current)) {
- return current;
- }
- const parent = path.dirname(current);
- if (parent === current) break; // Reached filesystem root
- current = parent;
- }
- // Check root as well
- if (isInitialized(current)) {
- return current;
- }
- return null;
- }
- /**
- * Contents of `.codegraph/.gitignore`. A single wildcard ignore keeps every
- * transient file in the index dir — the database, `daemon.pid`, the socket,
- * logs, cache, and anything future versions add — out of git, without having
- * to enumerate each name (issues #788, #492, #484). Older versions wrote an
- * explicit allowlist that never listed `daemon.pid` or the socket, so those
- * runtime files were silently committed.
- */
- const GITIGNORE_CONTENT = `# CodeGraph data files — local to each machine, not for committing.
- # Ignore everything in .codegraph/ except this file itself, so transient
- # files (the database, daemon.pid, sockets, logs) never show up in git.
- *
- !.gitignore
- `;
- /** Header line that prefixes every .gitignore CodeGraph has auto-generated. */
- const GITIGNORE_MARKER = '# CodeGraph data files';
- /**
- * Is `content` a stale CodeGraph-generated `.gitignore` that should be
- * regenerated in place? True when it carries our header but predates the
- * wildcard ignore (it has no bare `*` line) — i.e. one of the old explicit
- * allowlists (`*.db`, `cache/`, `.dirty`, …) that never ignored `daemon.pid`
- * or the socket (issue #788). A file WITHOUT our header is user-authored and
- * is left untouched; one that already has the wildcard is current. Matching
- * on the header (not a byte-exact list of past defaults) heals every old
- * variant — v0.7.x through 0.9.9 — and is idempotent once upgraded.
- */
- function isStaleDefaultGitignore(content: string): boolean {
- if (!content.trimStart().startsWith(GITIGNORE_MARKER)) return false;
- return !content.split('\n').some((line) => line.trim() === '*');
- }
- /**
- * Write `.codegraph/.gitignore` if it's absent, or upgrade a stale
- * CodeGraph-generated default in place; a user-customized file is left alone.
- * Best-effort — returns `false` only if a needed write failed.
- */
- function ensureGitignore(gitignorePath: string): boolean {
- let existing: string | null;
- try {
- existing = fs.readFileSync(gitignorePath, 'utf-8');
- } catch {
- existing = null; // absent (ENOENT) or unreadable — (re)create below
- }
- // Current default or a user-authored file: nothing to do.
- if (existing !== null && !isStaleDefaultGitignore(existing)) return true;
- try {
- fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
- return true;
- } catch {
- return false;
- }
- }
- /**
- * Create the .codegraph directory structure
- * Note: Only throws if codegraph.db already exists, not just if .codegraph/ exists.
- */
- export function createDirectory(projectRoot: string): void {
- const codegraphDir = getCodeGraphDir(projectRoot);
- const dbPath = path.join(codegraphDir, 'codegraph.db');
- // Only throw if CodeGraph is actually initialized (db exists)
- // .codegraph/ folder alone is fine
- if (fs.existsSync(dbPath)) {
- throw new Error(`CodeGraph already initialized in ${projectRoot}`);
- }
- // Create main directory (if it doesn't exist)
- fs.mkdirSync(codegraphDir, { recursive: true });
- // Write .gitignore inside .codegraph (create if absent, upgrade a stale
- // pre-wildcard default left by an older version — issue #788).
- ensureGitignore(path.join(codegraphDir, '.gitignore'));
- }
- /**
- * Remove the .codegraph directory
- */
- export function removeDirectory(projectRoot: string): void {
- const codegraphDir = getCodeGraphDir(projectRoot);
- if (!fs.existsSync(codegraphDir)) {
- return;
- }
- // Verify .codegraph is a real directory, not a symlink pointing elsewhere
- const lstat = fs.lstatSync(codegraphDir);
- if (lstat.isSymbolicLink()) {
- // Only remove the symlink itself, never follow it for recursive delete
- fs.unlinkSync(codegraphDir);
- return;
- }
- if (!lstat.isDirectory()) {
- // Not a directory - remove the single file
- fs.unlinkSync(codegraphDir);
- return;
- }
- // Recursively remove directory
- fs.rmSync(codegraphDir, { recursive: true, force: true });
- }
- /**
- * Get all files in the .codegraph directory
- */
- export function listDirectoryContents(projectRoot: string): string[] {
- const codegraphDir = getCodeGraphDir(projectRoot);
- if (!fs.existsSync(codegraphDir)) {
- return [];
- }
- const files: string[] = [];
- function walkDir(dir: string, prefix: string = ''): void {
- const entries = fs.readdirSync(dir, { withFileTypes: true });
- for (const entry of entries) {
- const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
- // Skip symlinks to prevent following links outside .codegraph
- if (entry.isSymbolicLink()) {
- continue;
- }
- if (entry.isDirectory()) {
- walkDir(path.join(dir, entry.name), relativePath);
- } else {
- files.push(relativePath);
- }
- }
- }
- walkDir(codegraphDir);
- return files;
- }
- /**
- * Get the total size of the .codegraph directory in bytes
- */
- export function getDirectorySize(projectRoot: string): number {
- const codegraphDir = getCodeGraphDir(projectRoot);
- if (!fs.existsSync(codegraphDir)) {
- return 0;
- }
- let totalSize = 0;
- function walkDir(dir: string): void {
- const entries = fs.readdirSync(dir, { withFileTypes: true });
- for (const entry of entries) {
- // Skip symlinks to prevent following links outside .codegraph
- if (entry.isSymbolicLink()) {
- continue;
- }
- const fullPath = path.join(dir, entry.name);
- if (entry.isDirectory()) {
- walkDir(fullPath);
- } else {
- const stats = fs.statSync(fullPath);
- totalSize += stats.size;
- }
- }
- }
- walkDir(codegraphDir);
- return totalSize;
- }
- /**
- * Ensure a subdirectory exists within .codegraph
- */
- export function ensureSubdirectory(projectRoot: string, subdirName: string): string {
- if (subdirName.includes('..') || subdirName.includes(path.sep) || subdirName.includes('/')) {
- throw new Error(`Invalid subdirectory name: ${subdirName}`);
- }
- const subdirPath = path.join(getCodeGraphDir(projectRoot), subdirName);
- if (!fs.existsSync(subdirPath)) {
- fs.mkdirSync(subdirPath, { recursive: true });
- }
- return subdirPath;
- }
- /**
- * Check if the .codegraph directory has valid structure
- */
- export function validateDirectory(projectRoot: string): {
- valid: boolean;
- errors: string[];
- } {
- const errors: string[] = [];
- const codegraphDir = getCodeGraphDir(projectRoot);
- if (!fs.existsSync(codegraphDir)) {
- errors.push('CodeGraph directory does not exist');
- return { valid: false, errors };
- }
- if (!fs.statSync(codegraphDir).isDirectory()) {
- errors.push('.codegraph exists but is not a directory');
- return { valid: false, errors };
- }
- // Auto-repair / upgrade .gitignore (non-critical file). A missing one is
- // recreated; a stale pre-wildcard default that never ignored daemon.pid is
- // regenerated in place (issue #788); a user-authored file is left alone.
- const gitignorePath = path.join(codegraphDir, '.gitignore');
- const existedBefore = fs.existsSync(gitignorePath);
- if (!ensureGitignore(gitignorePath) && !existedBefore) {
- // Only a missing-and-uncreatable file is surfaced; a failed in-place
- // upgrade of an existing file is non-fatal — the index still works.
- errors.push('.gitignore missing in .codegraph directory and could not be created');
- }
- return {
- valid: errors.length === 0,
- errors,
- };
- }
|