directory.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /**
  2. * Directory Management
  3. *
  4. * Manages the .codegraph/ directory structure for CodeGraph data.
  5. */
  6. import * as fs from 'fs';
  7. import * as os from 'os';
  8. import * as path from 'path';
  9. /** The default per-project data directory name. */
  10. const DEFAULT_CODEGRAPH_DIR = '.codegraph';
  11. let warnedBadDirName = false;
  12. /**
  13. * Resolve the per-project data directory name, honoring the `CODEGRAPH_DIR`
  14. * environment override (default `.codegraph`). The override is a single path
  15. * segment that lives in the project root.
  16. *
  17. * Why this exists: two environments that share one working tree must NOT share
  18. * one `.codegraph/` — most concretely Windows-native and WSL (issue #636). The
  19. * daemon lockfile (`.codegraph/daemon.pid`) records a platform-specific pid and
  20. * socket path (a Windows named pipe vs a WSL Unix socket), and SQLite file
  21. * locking across the WSL2 ↔ Windows filesystem boundary is unreliable, so two
  22. * daemons sharing one index risks corruption. Setting `CODEGRAPH_DIR=.codegraph-win`
  23. * on one side gives each environment its own index in the same tree.
  24. *
  25. * Read live (not captured at load) so it is both process-accurate and testable.
  26. * An override that isn't a plain directory name — empty, containing a path
  27. * separator, `.`, `..`/traversal, or absolute — is ignored (we keep the
  28. * default) rather than risk writing the index outside the project or into the
  29. * project root itself; we warn once to stderr so the misconfiguration is seen.
  30. */
  31. export function codeGraphDirName(): string {
  32. const raw = process.env.CODEGRAPH_DIR?.trim();
  33. if (!raw) return DEFAULT_CODEGRAPH_DIR;
  34. const invalid =
  35. raw === '.' ||
  36. raw.includes('..') ||
  37. raw.includes('/') ||
  38. raw.includes('\\') ||
  39. path.isAbsolute(raw);
  40. if (invalid) {
  41. if (!warnedBadDirName) {
  42. warnedBadDirName = true;
  43. // stderr only — stdout is the MCP protocol channel.
  44. console.warn(
  45. `[codegraph] Ignoring invalid CODEGRAPH_DIR="${raw}" — it must be a plain ` +
  46. `directory name (no path separators, no "..", not absolute). Using "${DEFAULT_CODEGRAPH_DIR}".`
  47. );
  48. }
  49. return DEFAULT_CODEGRAPH_DIR;
  50. }
  51. return raw;
  52. }
  53. /**
  54. * CodeGraph directory name — a load-time snapshot of {@link codeGraphDirName}.
  55. * A running process's environment is fixed, so this equals the live value;
  56. * it's kept as a stable string export for backward compatibility. Internal code
  57. * resolves the name through {@link codeGraphDirName} / {@link getCodeGraphDir}
  58. * so the `CODEGRAPH_DIR` override always applies.
  59. */
  60. export const CODEGRAPH_DIR = codeGraphDirName();
  61. /**
  62. * Is `name` (a single path segment) a CodeGraph data directory? Matches the
  63. * default `.codegraph`, the active `CODEGRAPH_DIR` override, and any
  64. * `.codegraph-*` sibling. File-watching and the indexer skip ALL of these, so
  65. * when two environments share one working tree (Windows + WSL, issue #636)
  66. * neither indexes or watches the other's index directory.
  67. */
  68. export function isCodeGraphDataDir(name: string): boolean {
  69. return (
  70. name === DEFAULT_CODEGRAPH_DIR ||
  71. name === codeGraphDirName() ||
  72. name.startsWith(DEFAULT_CODEGRAPH_DIR + '-')
  73. );
  74. }
  75. /**
  76. * Get the .codegraph directory path for a project
  77. */
  78. export function getCodeGraphDir(projectRoot: string): string {
  79. return path.join(projectRoot, codeGraphDirName());
  80. }
  81. /**
  82. * Check if a project has been initialized with CodeGraph
  83. * Requires both .codegraph/ directory AND codegraph.db to exist
  84. */
  85. export function isInitialized(projectRoot: string): boolean {
  86. const codegraphDir = getCodeGraphDir(projectRoot);
  87. if (!fs.existsSync(codegraphDir) || !fs.statSync(codegraphDir).isDirectory()) {
  88. return false;
  89. }
  90. // Must have codegraph.db, not just .codegraph folder
  91. const dbPath = path.join(codegraphDir, 'codegraph.db');
  92. return fs.existsSync(dbPath);
  93. }
  94. /**
  95. * Find the nearest parent directory containing .codegraph/
  96. *
  97. * Walks up from the given path to find a CodeGraph-initialized project,
  98. * similar to how git finds .git/ directories.
  99. *
  100. * @param startPath - Directory to start searching from
  101. * @returns The project root containing .codegraph/, or null if not found
  102. */
  103. /**
  104. * Reason a directory is unsafe to use as an index ROOT, or null when it's fine.
  105. *
  106. * Indexing your home directory or a filesystem root drags in caches, `Library`,
  107. * every other project, etc. — a multi-GB index, constant file-watcher churn, and
  108. * (pre-1.0 on macOS) a file-descriptor blowup that exhausted `kern.maxfiles` and
  109. * took unrelated apps / the whole machine down (#845). The classic trigger:
  110. * running the installer or `codegraph init` from `$HOME`, which auto-indexes the
  111. * current directory. These are never intended project roots, so the installer
  112. * and `init`/`index` refuse them (overridable with `--force`).
  113. *
  114. * Pure-ish (reads only `os.homedir()` + realpath) so it's easy to unit-test.
  115. * The returned string is a human phrase that slots into "… looks like {reason}".
  116. */
  117. export function unsafeIndexRootReason(projectRoot: string): string | null {
  118. const resolve = (p: string): string => {
  119. try {
  120. return fs.realpathSync(path.resolve(p));
  121. } catch {
  122. return path.resolve(p);
  123. }
  124. };
  125. const resolved = resolve(projectRoot);
  126. // Filesystem root: `/` on POSIX, a drive root like `C:\` on Windows.
  127. if (path.parse(resolved).root === resolved) {
  128. return 'the filesystem root';
  129. }
  130. const home = resolve(os.homedir());
  131. // Case-insensitive on macOS/Windows (case-preserving but case-insensitive FS).
  132. const norm = (p: string): string =>
  133. process.platform === 'darwin' || process.platform === 'win32' ? p.toLowerCase() : p;
  134. const r = norm(resolved);
  135. const h = norm(home);
  136. if (r === h) {
  137. return 'your home directory';
  138. }
  139. // An ancestor of home (e.g. `/Users`, `/home`) — even broader than home.
  140. if (h.startsWith(r + path.sep)) {
  141. return 'a parent of your home directory';
  142. }
  143. return null;
  144. }
  145. export function findNearestCodeGraphRoot(startPath: string): string | null {
  146. let current = path.resolve(startPath);
  147. const root = path.parse(current).root;
  148. while (current !== root) {
  149. if (isInitialized(current)) {
  150. return current;
  151. }
  152. const parent = path.dirname(current);
  153. if (parent === current) break; // Reached filesystem root
  154. current = parent;
  155. }
  156. // Check root as well
  157. if (isInitialized(current)) {
  158. return current;
  159. }
  160. return null;
  161. }
  162. /**
  163. * Contents of `.codegraph/.gitignore`. A single wildcard ignore keeps every
  164. * transient file in the index dir — the database, `daemon.pid`, the socket,
  165. * logs, cache, and anything future versions add — out of git, without having
  166. * to enumerate each name (issues #788, #492, #484). Older versions wrote an
  167. * explicit allowlist that never listed `daemon.pid` or the socket, so those
  168. * runtime files were silently committed.
  169. */
  170. const GITIGNORE_CONTENT = `# CodeGraph data files — local to each machine, not for committing.
  171. # Ignore everything in .codegraph/ except this file itself, so transient
  172. # files (the database, daemon.pid, sockets, logs) never show up in git.
  173. *
  174. !.gitignore
  175. `;
  176. /** Header line that prefixes every .gitignore CodeGraph has auto-generated. */
  177. const GITIGNORE_MARKER = '# CodeGraph data files';
  178. /**
  179. * Is `content` a stale CodeGraph-generated `.gitignore` that should be
  180. * regenerated in place? True when it carries our header but predates the
  181. * wildcard ignore (it has no bare `*` line) — i.e. one of the old explicit
  182. * allowlists (`*.db`, `cache/`, `.dirty`, …) that never ignored `daemon.pid`
  183. * or the socket (issue #788). A file WITHOUT our header is user-authored and
  184. * is left untouched; one that already has the wildcard is current. Matching
  185. * on the header (not a byte-exact list of past defaults) heals every old
  186. * variant — v0.7.x through 0.9.9 — and is idempotent once upgraded.
  187. */
  188. function isStaleDefaultGitignore(content: string): boolean {
  189. if (!content.trimStart().startsWith(GITIGNORE_MARKER)) return false;
  190. return !content.split('\n').some((line) => line.trim() === '*');
  191. }
  192. /**
  193. * Write `.codegraph/.gitignore` if it's absent, or upgrade a stale
  194. * CodeGraph-generated default in place; a user-customized file is left alone.
  195. * Best-effort — returns `false` only if a needed write failed.
  196. */
  197. function ensureGitignore(gitignorePath: string): boolean {
  198. let existing: string | null;
  199. try {
  200. existing = fs.readFileSync(gitignorePath, 'utf-8');
  201. } catch {
  202. existing = null; // absent (ENOENT) or unreadable — (re)create below
  203. }
  204. // Current default or a user-authored file: nothing to do.
  205. if (existing !== null && !isStaleDefaultGitignore(existing)) return true;
  206. try {
  207. fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
  208. return true;
  209. } catch {
  210. return false;
  211. }
  212. }
  213. /**
  214. * Create the .codegraph directory structure
  215. * Note: Only throws if codegraph.db already exists, not just if .codegraph/ exists.
  216. */
  217. export function createDirectory(projectRoot: string): void {
  218. const codegraphDir = getCodeGraphDir(projectRoot);
  219. const dbPath = path.join(codegraphDir, 'codegraph.db');
  220. // Only throw if CodeGraph is actually initialized (db exists)
  221. // .codegraph/ folder alone is fine
  222. if (fs.existsSync(dbPath)) {
  223. throw new Error(`CodeGraph already initialized in ${projectRoot}`);
  224. }
  225. // Create main directory (if it doesn't exist)
  226. fs.mkdirSync(codegraphDir, { recursive: true });
  227. // Write .gitignore inside .codegraph (create if absent, upgrade a stale
  228. // pre-wildcard default left by an older version — issue #788).
  229. ensureGitignore(path.join(codegraphDir, '.gitignore'));
  230. }
  231. /**
  232. * Remove the .codegraph directory
  233. */
  234. export function removeDirectory(projectRoot: string): void {
  235. const codegraphDir = getCodeGraphDir(projectRoot);
  236. if (!fs.existsSync(codegraphDir)) {
  237. return;
  238. }
  239. // Verify .codegraph is a real directory, not a symlink pointing elsewhere
  240. const lstat = fs.lstatSync(codegraphDir);
  241. if (lstat.isSymbolicLink()) {
  242. // Only remove the symlink itself, never follow it for recursive delete
  243. fs.unlinkSync(codegraphDir);
  244. return;
  245. }
  246. if (!lstat.isDirectory()) {
  247. // Not a directory - remove the single file
  248. fs.unlinkSync(codegraphDir);
  249. return;
  250. }
  251. // Recursively remove directory
  252. fs.rmSync(codegraphDir, { recursive: true, force: true });
  253. }
  254. /**
  255. * Get all files in the .codegraph directory
  256. */
  257. export function listDirectoryContents(projectRoot: string): string[] {
  258. const codegraphDir = getCodeGraphDir(projectRoot);
  259. if (!fs.existsSync(codegraphDir)) {
  260. return [];
  261. }
  262. const files: string[] = [];
  263. function walkDir(dir: string, prefix: string = ''): void {
  264. const entries = fs.readdirSync(dir, { withFileTypes: true });
  265. for (const entry of entries) {
  266. const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
  267. // Skip symlinks to prevent following links outside .codegraph
  268. if (entry.isSymbolicLink()) {
  269. continue;
  270. }
  271. if (entry.isDirectory()) {
  272. walkDir(path.join(dir, entry.name), relativePath);
  273. } else {
  274. files.push(relativePath);
  275. }
  276. }
  277. }
  278. walkDir(codegraphDir);
  279. return files;
  280. }
  281. /**
  282. * Get the total size of the .codegraph directory in bytes
  283. */
  284. export function getDirectorySize(projectRoot: string): number {
  285. const codegraphDir = getCodeGraphDir(projectRoot);
  286. if (!fs.existsSync(codegraphDir)) {
  287. return 0;
  288. }
  289. let totalSize = 0;
  290. function walkDir(dir: string): void {
  291. const entries = fs.readdirSync(dir, { withFileTypes: true });
  292. for (const entry of entries) {
  293. // Skip symlinks to prevent following links outside .codegraph
  294. if (entry.isSymbolicLink()) {
  295. continue;
  296. }
  297. const fullPath = path.join(dir, entry.name);
  298. if (entry.isDirectory()) {
  299. walkDir(fullPath);
  300. } else {
  301. const stats = fs.statSync(fullPath);
  302. totalSize += stats.size;
  303. }
  304. }
  305. }
  306. walkDir(codegraphDir);
  307. return totalSize;
  308. }
  309. /**
  310. * Ensure a subdirectory exists within .codegraph
  311. */
  312. export function ensureSubdirectory(projectRoot: string, subdirName: string): string {
  313. if (subdirName.includes('..') || subdirName.includes(path.sep) || subdirName.includes('/')) {
  314. throw new Error(`Invalid subdirectory name: ${subdirName}`);
  315. }
  316. const subdirPath = path.join(getCodeGraphDir(projectRoot), subdirName);
  317. if (!fs.existsSync(subdirPath)) {
  318. fs.mkdirSync(subdirPath, { recursive: true });
  319. }
  320. return subdirPath;
  321. }
  322. /**
  323. * Check if the .codegraph directory has valid structure
  324. */
  325. export function validateDirectory(projectRoot: string): {
  326. valid: boolean;
  327. errors: string[];
  328. } {
  329. const errors: string[] = [];
  330. const codegraphDir = getCodeGraphDir(projectRoot);
  331. if (!fs.existsSync(codegraphDir)) {
  332. errors.push('CodeGraph directory does not exist');
  333. return { valid: false, errors };
  334. }
  335. if (!fs.statSync(codegraphDir).isDirectory()) {
  336. errors.push('.codegraph exists but is not a directory');
  337. return { valid: false, errors };
  338. }
  339. // Auto-repair / upgrade .gitignore (non-critical file). A missing one is
  340. // recreated; a stale pre-wildcard default that never ignored daemon.pid is
  341. // regenerated in place (issue #788); a user-authored file is left alone.
  342. const gitignorePath = path.join(codegraphDir, '.gitignore');
  343. const existedBefore = fs.existsSync(gitignorePath);
  344. if (!ensureGitignore(gitignorePath) && !existedBefore) {
  345. // Only a missing-and-uncreatable file is surfaced; a failed in-place
  346. // upgrade of an existing file is non-fatal — the index still works.
  347. errors.push('.gitignore missing in .codegraph directory and could not be created');
  348. }
  349. return {
  350. valid: errors.length === 0,
  351. errors,
  352. };
  353. }