Bläddra i källkod

fix(directory): self-heal a stale .codegraph/.gitignore so daemon.pid is ignored (#788) (#802)

Versions <= 0.9.9 wrote an explicit-allowlist .codegraph/.gitignore
(*.db, cache/, .dirty, ...) that never listed daemon.pid or the socket,
so the daemon's runtime pidfile got committed. The wildcard rewrite in
#654/#492/#484 fixed new inits, but the file is only written when
absent, so existing installs kept their stale file forever — the fix
never reached the people hitting it.

Make the gitignore self-heal: ensureGitignore() writes the file if
absent and upgrades a stale CodeGraph-generated default in place,
leaving a user-authored file untouched. A "stale default" is one that
carries our `# CodeGraph data files` header but predates the wildcard
ignore (no bare `*` line) — a header match heals every historical
variant (v0.7.x..0.9.9, all verified to share it) and is idempotent.
validateDirectory() runs on every open()/openSync(), so existing repos
heal on the next codegraph command after upgrading. The duplicated
template (previously inlined in two formats) is consolidated into one
GITIGNORE_CONTENT constant.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 vecka sedan
förälder
incheckning
9a0f144770
3 ändrade filer med 107 tillägg och 21 borttagningar
  1. 1 0
      CHANGELOG.md
  2. 40 0
      __tests__/foundation.test.ts
  3. 66 21
      src/directory.ts

+ 1 - 0
CHANGELOG.md

@@ -93,6 +93,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Indexing a very large repository no longer aborts during its first sync with a "too many SQL variables" error. (#540)
 - Files under directories with non-ASCII names (for example CJK characters) are no longer silently skipped during indexing. (#541)
 - The `.codegraph/` index folder no longer clutters `git status`: its generated ignore file now excludes everything in the folder except itself, so the database, `daemon.pid`, sockets, and logs stop showing up as untracked changes. (#492, #484)
+- Projects initialized by an older version now get that fix automatically: a `.codegraph/.gitignore` written before this change — which listed only the database, cache, and logs and so let the daemon's `daemon.pid` get committed — is upgraded in place the next time you run any CodeGraph command. A `.gitignore` you've customized yourself is left untouched. (#788)
 - SAP HANA `.xsjs` / `.xsjslib` files are now indexed as JavaScript. (#556)
 - TypeScript `.mts` and `.cts` module files are now indexed instead of being skipped. (#366)
 - JavaScript modules that wrap their code in an anonymous function — AMD/RequireJS, NetSuite SuiteScript, IIFE bundles — now have their inner functions and calls indexed, instead of the file coming up nearly empty. (#528)

+ 40 - 0
__tests__/foundation.test.ts

@@ -159,6 +159,46 @@ describe('CodeGraph Foundation', () => {
       expect(validation.valid).toBe(false);
       expect(validation.errors.length).toBeGreaterThan(0);
     });
+
+    it('upgrades a stale pre-wildcard .gitignore in place (issue #788)', () => {
+      const cg = CodeGraph.initSync(tempDir);
+      cg.close();
+
+      const gitignorePath = path.join(getCodeGraphDir(tempDir), '.gitignore');
+      // A .gitignore written by an older version (<= 0.9.9): an explicit
+      // allowlist that never ignored daemon.pid, so the daemon's runtime
+      // pidfile got committed.
+      const staleV099 =
+        '# CodeGraph data files\n' +
+        '# These are local to each machine and should not be committed\n\n' +
+        '# Database\n*.db\n*.db-wal\n*.db-shm\n\n' +
+        '# Cache\ncache/\n\n# Logs\n*.log\n\n# Hook markers\n.dirty\n';
+      fs.writeFileSync(gitignorePath, staleV099, 'utf-8');
+
+      // Opening the project runs validateDirectory, which self-heals.
+      const cg2 = CodeGraph.openSync(tempDir);
+      cg2.close();
+
+      const upgraded = fs.readFileSync(gitignorePath, 'utf-8');
+      expect(upgraded).toContain('\n*\n'); // wildcard ignores everything…
+      expect(upgraded).toContain('!.gitignore'); // …except this file
+      expect(upgraded).not.toContain('.dirty'); // old explicit list is gone
+    });
+
+    it('leaves a user-customized .codegraph/.gitignore untouched', () => {
+      const cg = CodeGraph.initSync(tempDir);
+      cg.close();
+
+      const gitignorePath = path.join(getCodeGraphDir(tempDir), '.gitignore');
+      // No CodeGraph header → user-authored → must not be rewritten.
+      const custom = '# my own rules\n*.db\n!keep-this.json\n';
+      fs.writeFileSync(gitignorePath, custom, 'utf-8');
+
+      const cg2 = CodeGraph.openSync(tempDir);
+      cg2.close();
+
+      expect(fs.readFileSync(gitignorePath, 'utf-8')).toBe(custom);
+    });
   });
 
   describe('Uninitialize', () => {

+ 66 - 21
src/directory.ts

@@ -129,6 +129,61 @@ export function findNearestCodeGraphRoot(startPath: string): string | null {
   return null;
 }
 
+/**
+ * Contents of `.codegraph/.gitignore`. A single wildcard ignore keeps every
+ * transient file in the index dir — the database, `daemon.pid`, the socket,
+ * logs, cache, and anything future versions add — out of git, without having
+ * to enumerate each name (issues #788, #492, #484). Older versions wrote an
+ * explicit allowlist that never listed `daemon.pid` or the socket, so those
+ * runtime files were silently committed.
+ */
+const GITIGNORE_CONTENT = `# CodeGraph data files — local to each machine, not for committing.
+# Ignore everything in .codegraph/ except this file itself, so transient
+# files (the database, daemon.pid, sockets, logs) never show up in git.
+*
+!.gitignore
+`;
+
+/** Header line that prefixes every .gitignore CodeGraph has auto-generated. */
+const GITIGNORE_MARKER = '# CodeGraph data files';
+
+/**
+ * Is `content` a stale CodeGraph-generated `.gitignore` that should be
+ * regenerated in place? True when it carries our header but predates the
+ * wildcard ignore (it has no bare `*` line) — i.e. one of the old explicit
+ * allowlists (`*.db`, `cache/`, `.dirty`, …) that never ignored `daemon.pid`
+ * or the socket (issue #788). A file WITHOUT our header is user-authored and
+ * is left untouched; one that already has the wildcard is current. Matching
+ * on the header (not a byte-exact list of past defaults) heals every old
+ * variant — v0.7.x through 0.9.9 — and is idempotent once upgraded.
+ */
+function isStaleDefaultGitignore(content: string): boolean {
+  if (!content.trimStart().startsWith(GITIGNORE_MARKER)) return false;
+  return !content.split('\n').some((line) => line.trim() === '*');
+}
+
+/**
+ * Write `.codegraph/.gitignore` if it's absent, or upgrade a stale
+ * CodeGraph-generated default in place; a user-customized file is left alone.
+ * Best-effort — returns `false` only if a needed write failed.
+ */
+function ensureGitignore(gitignorePath: string): boolean {
+  let existing: string | null;
+  try {
+    existing = fs.readFileSync(gitignorePath, 'utf-8');
+  } catch {
+    existing = null; // absent (ENOENT) or unreadable — (re)create below
+  }
+  // Current default or a user-authored file: nothing to do.
+  if (existing !== null && !isStaleDefaultGitignore(existing)) return true;
+  try {
+    fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
+    return true;
+  } catch {
+    return false;
+  }
+}
+
 /**
  * Create the .codegraph directory structure
  * Note: Only throws if codegraph.db already exists, not just if .codegraph/ exists.
@@ -146,18 +201,9 @@ export function createDirectory(projectRoot: string): void {
   // Create main directory (if it doesn't exist)
   fs.mkdirSync(codegraphDir, { recursive: true });
 
-  // Create .gitignore inside .codegraph (if it doesn't exist)
-  const gitignorePath = path.join(codegraphDir, '.gitignore');
-  if (!fs.existsSync(gitignorePath)) {
-    const gitignoreContent = `# CodeGraph data files — local to each machine, not for committing.
-# Ignore everything in .codegraph/ except this file itself, so transient
-# files (the database, daemon.pid, sockets, logs) never show up in git.
-*
-!.gitignore
-`;
-
-    fs.writeFileSync(gitignorePath, gitignoreContent, 'utf-8');
-  }
+  // Write .gitignore inside .codegraph (create if absent, upgrade a stale
+  // pre-wildcard default left by an older version — issue #788).
+  ensureGitignore(path.join(codegraphDir, '.gitignore'));
 }
 
 /**
@@ -296,16 +342,15 @@ export function validateDirectory(projectRoot: string): {
     return { valid: false, errors };
   }
 
-  // Auto-repair missing .gitignore (non-critical file)
+  // Auto-repair / upgrade .gitignore (non-critical file). A missing one is
+  // recreated; a stale pre-wildcard default that never ignored daemon.pid is
+  // regenerated in place (issue #788); a user-authored file is left alone.
   const gitignorePath = path.join(codegraphDir, '.gitignore');
-  if (!fs.existsSync(gitignorePath)) {
-    try {
-      const gitignoreContent = `# CodeGraph data files — local to each machine, not for committing.\n# Ignore everything in .codegraph/ except this file itself, so transient\n# files (the database, daemon.pid, sockets, logs) never show up in git.\n*\n!.gitignore\n`;
-      fs.writeFileSync(gitignorePath, gitignoreContent, 'utf-8');
-    } catch {
-      // Non-fatal: warn but don't block
-      errors.push('.gitignore missing in .codegraph directory and could not be created');
-    }
+  const existedBefore = fs.existsSync(gitignorePath);
+  if (!ensureGitignore(gitignorePath) && !existedBefore) {
+    // Only a missing-and-uncreatable file is surfaced; a failed in-place
+    // upgrade of an existing file is non-fatal — the index still works.
+    errors.push('.gitignore missing in .codegraph directory and could not be created');
   }
 
   return {