directory.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /**
  2. * Directory Management
  3. *
  4. * Manages the .codegraph/ directory structure for CodeGraph data.
  5. */
  6. import * as fs from 'fs';
  7. import * as path from 'path';
  8. /** The default per-project data directory name. */
  9. const DEFAULT_CODEGRAPH_DIR = '.codegraph';
  10. let warnedBadDirName = false;
  11. /**
  12. * Resolve the per-project data directory name, honoring the `CODEGRAPH_DIR`
  13. * environment override (default `.codegraph`). The override is a single path
  14. * segment that lives in the project root.
  15. *
  16. * Why this exists: two environments that share one working tree must NOT share
  17. * one `.codegraph/` — most concretely Windows-native and WSL (issue #636). The
  18. * daemon lockfile (`.codegraph/daemon.pid`) records a platform-specific pid and
  19. * socket path (a Windows named pipe vs a WSL Unix socket), and SQLite file
  20. * locking across the WSL2 ↔ Windows filesystem boundary is unreliable, so two
  21. * daemons sharing one index risks corruption. Setting `CODEGRAPH_DIR=.codegraph-win`
  22. * on one side gives each environment its own index in the same tree.
  23. *
  24. * Read live (not captured at load) so it is both process-accurate and testable.
  25. * An override that isn't a plain directory name — empty, containing a path
  26. * separator, `.`, `..`/traversal, or absolute — is ignored (we keep the
  27. * default) rather than risk writing the index outside the project or into the
  28. * project root itself; we warn once to stderr so the misconfiguration is seen.
  29. */
  30. export function codeGraphDirName(): string {
  31. const raw = process.env.CODEGRAPH_DIR?.trim();
  32. if (!raw) return DEFAULT_CODEGRAPH_DIR;
  33. const invalid =
  34. raw === '.' ||
  35. raw.includes('..') ||
  36. raw.includes('/') ||
  37. raw.includes('\\') ||
  38. path.isAbsolute(raw);
  39. if (invalid) {
  40. if (!warnedBadDirName) {
  41. warnedBadDirName = true;
  42. // stderr only — stdout is the MCP protocol channel.
  43. console.warn(
  44. `[codegraph] Ignoring invalid CODEGRAPH_DIR="${raw}" — it must be a plain ` +
  45. `directory name (no path separators, no "..", not absolute). Using "${DEFAULT_CODEGRAPH_DIR}".`
  46. );
  47. }
  48. return DEFAULT_CODEGRAPH_DIR;
  49. }
  50. return raw;
  51. }
  52. /**
  53. * CodeGraph directory name — a load-time snapshot of {@link codeGraphDirName}.
  54. * A running process's environment is fixed, so this equals the live value;
  55. * it's kept as a stable string export for backward compatibility. Internal code
  56. * resolves the name through {@link codeGraphDirName} / {@link getCodeGraphDir}
  57. * so the `CODEGRAPH_DIR` override always applies.
  58. */
  59. export const CODEGRAPH_DIR = codeGraphDirName();
  60. /**
  61. * Is `name` (a single path segment) a CodeGraph data directory? Matches the
  62. * default `.codegraph`, the active `CODEGRAPH_DIR` override, and any
  63. * `.codegraph-*` sibling. File-watching and the indexer skip ALL of these, so
  64. * when two environments share one working tree (Windows + WSL, issue #636)
  65. * neither indexes or watches the other's index directory.
  66. */
  67. export function isCodeGraphDataDir(name: string): boolean {
  68. return (
  69. name === DEFAULT_CODEGRAPH_DIR ||
  70. name === codeGraphDirName() ||
  71. name.startsWith(DEFAULT_CODEGRAPH_DIR + '-')
  72. );
  73. }
  74. /**
  75. * Get the .codegraph directory path for a project
  76. */
  77. export function getCodeGraphDir(projectRoot: string): string {
  78. return path.join(projectRoot, codeGraphDirName());
  79. }
  80. /**
  81. * Check if a project has been initialized with CodeGraph
  82. * Requires both .codegraph/ directory AND codegraph.db to exist
  83. */
  84. export function isInitialized(projectRoot: string): boolean {
  85. const codegraphDir = getCodeGraphDir(projectRoot);
  86. if (!fs.existsSync(codegraphDir) || !fs.statSync(codegraphDir).isDirectory()) {
  87. return false;
  88. }
  89. // Must have codegraph.db, not just .codegraph folder
  90. const dbPath = path.join(codegraphDir, 'codegraph.db');
  91. return fs.existsSync(dbPath);
  92. }
  93. /**
  94. * Find the nearest parent directory containing .codegraph/
  95. *
  96. * Walks up from the given path to find a CodeGraph-initialized project,
  97. * similar to how git finds .git/ directories.
  98. *
  99. * @param startPath - Directory to start searching from
  100. * @returns The project root containing .codegraph/, or null if not found
  101. */
  102. export function findNearestCodeGraphRoot(startPath: string): string | null {
  103. let current = path.resolve(startPath);
  104. const root = path.parse(current).root;
  105. while (current !== root) {
  106. if (isInitialized(current)) {
  107. return current;
  108. }
  109. const parent = path.dirname(current);
  110. if (parent === current) break; // Reached filesystem root
  111. current = parent;
  112. }
  113. // Check root as well
  114. if (isInitialized(current)) {
  115. return current;
  116. }
  117. return null;
  118. }
  119. /**
  120. * Contents of `.codegraph/.gitignore`. A single wildcard ignore keeps every
  121. * transient file in the index dir — the database, `daemon.pid`, the socket,
  122. * logs, cache, and anything future versions add — out of git, without having
  123. * to enumerate each name (issues #788, #492, #484). Older versions wrote an
  124. * explicit allowlist that never listed `daemon.pid` or the socket, so those
  125. * runtime files were silently committed.
  126. */
  127. const GITIGNORE_CONTENT = `# CodeGraph data files — local to each machine, not for committing.
  128. # Ignore everything in .codegraph/ except this file itself, so transient
  129. # files (the database, daemon.pid, sockets, logs) never show up in git.
  130. *
  131. !.gitignore
  132. `;
  133. /** Header line that prefixes every .gitignore CodeGraph has auto-generated. */
  134. const GITIGNORE_MARKER = '# CodeGraph data files';
  135. /**
  136. * Is `content` a stale CodeGraph-generated `.gitignore` that should be
  137. * regenerated in place? True when it carries our header but predates the
  138. * wildcard ignore (it has no bare `*` line) — i.e. one of the old explicit
  139. * allowlists (`*.db`, `cache/`, `.dirty`, …) that never ignored `daemon.pid`
  140. * or the socket (issue #788). A file WITHOUT our header is user-authored and
  141. * is left untouched; one that already has the wildcard is current. Matching
  142. * on the header (not a byte-exact list of past defaults) heals every old
  143. * variant — v0.7.x through 0.9.9 — and is idempotent once upgraded.
  144. */
  145. function isStaleDefaultGitignore(content: string): boolean {
  146. if (!content.trimStart().startsWith(GITIGNORE_MARKER)) return false;
  147. return !content.split('\n').some((line) => line.trim() === '*');
  148. }
  149. /**
  150. * Write `.codegraph/.gitignore` if it's absent, or upgrade a stale
  151. * CodeGraph-generated default in place; a user-customized file is left alone.
  152. * Best-effort — returns `false` only if a needed write failed.
  153. */
  154. function ensureGitignore(gitignorePath: string): boolean {
  155. let existing: string | null;
  156. try {
  157. existing = fs.readFileSync(gitignorePath, 'utf-8');
  158. } catch {
  159. existing = null; // absent (ENOENT) or unreadable — (re)create below
  160. }
  161. // Current default or a user-authored file: nothing to do.
  162. if (existing !== null && !isStaleDefaultGitignore(existing)) return true;
  163. try {
  164. fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
  165. return true;
  166. } catch {
  167. return false;
  168. }
  169. }
  170. /**
  171. * Create the .codegraph directory structure
  172. * Note: Only throws if codegraph.db already exists, not just if .codegraph/ exists.
  173. */
  174. export function createDirectory(projectRoot: string): void {
  175. const codegraphDir = getCodeGraphDir(projectRoot);
  176. const dbPath = path.join(codegraphDir, 'codegraph.db');
  177. // Only throw if CodeGraph is actually initialized (db exists)
  178. // .codegraph/ folder alone is fine
  179. if (fs.existsSync(dbPath)) {
  180. throw new Error(`CodeGraph already initialized in ${projectRoot}`);
  181. }
  182. // Create main directory (if it doesn't exist)
  183. fs.mkdirSync(codegraphDir, { recursive: true });
  184. // Write .gitignore inside .codegraph (create if absent, upgrade a stale
  185. // pre-wildcard default left by an older version — issue #788).
  186. ensureGitignore(path.join(codegraphDir, '.gitignore'));
  187. }
  188. /**
  189. * Remove the .codegraph directory
  190. */
  191. export function removeDirectory(projectRoot: string): void {
  192. const codegraphDir = getCodeGraphDir(projectRoot);
  193. if (!fs.existsSync(codegraphDir)) {
  194. return;
  195. }
  196. // Verify .codegraph is a real directory, not a symlink pointing elsewhere
  197. const lstat = fs.lstatSync(codegraphDir);
  198. if (lstat.isSymbolicLink()) {
  199. // Only remove the symlink itself, never follow it for recursive delete
  200. fs.unlinkSync(codegraphDir);
  201. return;
  202. }
  203. if (!lstat.isDirectory()) {
  204. // Not a directory - remove the single file
  205. fs.unlinkSync(codegraphDir);
  206. return;
  207. }
  208. // Recursively remove directory
  209. fs.rmSync(codegraphDir, { recursive: true, force: true });
  210. }
  211. /**
  212. * Get all files in the .codegraph directory
  213. */
  214. export function listDirectoryContents(projectRoot: string): string[] {
  215. const codegraphDir = getCodeGraphDir(projectRoot);
  216. if (!fs.existsSync(codegraphDir)) {
  217. return [];
  218. }
  219. const files: string[] = [];
  220. function walkDir(dir: string, prefix: string = ''): void {
  221. const entries = fs.readdirSync(dir, { withFileTypes: true });
  222. for (const entry of entries) {
  223. const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
  224. // Skip symlinks to prevent following links outside .codegraph
  225. if (entry.isSymbolicLink()) {
  226. continue;
  227. }
  228. if (entry.isDirectory()) {
  229. walkDir(path.join(dir, entry.name), relativePath);
  230. } else {
  231. files.push(relativePath);
  232. }
  233. }
  234. }
  235. walkDir(codegraphDir);
  236. return files;
  237. }
  238. /**
  239. * Get the total size of the .codegraph directory in bytes
  240. */
  241. export function getDirectorySize(projectRoot: string): number {
  242. const codegraphDir = getCodeGraphDir(projectRoot);
  243. if (!fs.existsSync(codegraphDir)) {
  244. return 0;
  245. }
  246. let totalSize = 0;
  247. function walkDir(dir: string): void {
  248. const entries = fs.readdirSync(dir, { withFileTypes: true });
  249. for (const entry of entries) {
  250. // Skip symlinks to prevent following links outside .codegraph
  251. if (entry.isSymbolicLink()) {
  252. continue;
  253. }
  254. const fullPath = path.join(dir, entry.name);
  255. if (entry.isDirectory()) {
  256. walkDir(fullPath);
  257. } else {
  258. const stats = fs.statSync(fullPath);
  259. totalSize += stats.size;
  260. }
  261. }
  262. }
  263. walkDir(codegraphDir);
  264. return totalSize;
  265. }
  266. /**
  267. * Ensure a subdirectory exists within .codegraph
  268. */
  269. export function ensureSubdirectory(projectRoot: string, subdirName: string): string {
  270. if (subdirName.includes('..') || subdirName.includes(path.sep) || subdirName.includes('/')) {
  271. throw new Error(`Invalid subdirectory name: ${subdirName}`);
  272. }
  273. const subdirPath = path.join(getCodeGraphDir(projectRoot), subdirName);
  274. if (!fs.existsSync(subdirPath)) {
  275. fs.mkdirSync(subdirPath, { recursive: true });
  276. }
  277. return subdirPath;
  278. }
  279. /**
  280. * Check if the .codegraph directory has valid structure
  281. */
  282. export function validateDirectory(projectRoot: string): {
  283. valid: boolean;
  284. errors: string[];
  285. } {
  286. const errors: string[] = [];
  287. const codegraphDir = getCodeGraphDir(projectRoot);
  288. if (!fs.existsSync(codegraphDir)) {
  289. errors.push('CodeGraph directory does not exist');
  290. return { valid: false, errors };
  291. }
  292. if (!fs.statSync(codegraphDir).isDirectory()) {
  293. errors.push('.codegraph exists but is not a directory');
  294. return { valid: false, errors };
  295. }
  296. // Auto-repair / upgrade .gitignore (non-critical file). A missing one is
  297. // recreated; a stale pre-wildcard default that never ignored daemon.pid is
  298. // regenerated in place (issue #788); a user-authored file is left alone.
  299. const gitignorePath = path.join(codegraphDir, '.gitignore');
  300. const existedBefore = fs.existsSync(gitignorePath);
  301. if (!ensureGitignore(gitignorePath) && !existedBefore) {
  302. // Only a missing-and-uncreatable file is surfaced; a failed in-place
  303. // upgrade of an existing file is non-fatal — the index still works.
  304. errors.push('.gitignore missing in .codegraph directory and could not be created');
  305. }
  306. return {
  307. valid: errors.length === 0,
  308. errors,
  309. };
  310. }