Explorar o código

fix(sync): stop reporting git-untracked files as pending after sync (#206) (#218)

Both git fast-paths in ExtractionOrchestrator (sync and getChangedFiles)
classified every untracked (`??`) file as "added" without checking the
index. Indexing a file doesn't make git track it, so the file stayed `??`
and was re-reported as pending and re-indexed on every run: `codegraph
status` listed it under Pending Changes forever and each `sync` re-added
it, even though its symbols were already queryable.

Merge the modified + added handling into a single hash-compared loop so
untracked files get the same treatment as tracked ones: "added" only if
missing from the index, "modified" if contents changed, skipped otherwise.
The non-git fallback path already did this and is unchanged.

Closes #206. Reported by @15290391025.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Colby Mchenry hai 1 mes
pai
achega
a47355780b
Modificáronse 3 ficheiros con 67 adicións e 16 borrados
  1. 12 0
      CHANGELOG.md
  2. 44 0
      __tests__/sync.test.ts
  3. 11 16
      src/extraction/index.ts

+ 12 - 0
CHANGELOG.md

@@ -63,6 +63,18 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
   Thanks to [@essopsp](https://github.com/essopsp) for the repro.
   Thanks to [@essopsp](https://github.com/essopsp) for the repro.
 
 
 ### Fixed
 ### Fixed
+- **Sync / status**: git-untracked files are no longer reported as pending
+  "Added" forever. After `codegraph sync` indexed a newly-created untracked
+  source file, `codegraph status` kept listing it under Pending Changes and
+  every subsequent `sync` re-indexed it from scratch — even though its symbols
+  were already queryable. Change detection trusted `git status` and counted
+  every untracked (`??`) entry as new without checking the index, but indexing
+  a file doesn't make git track it, so the file stayed `??` and got re-added on
+  each run. CodeGraph now hash-compares untracked files against the index the
+  same way it does tracked files: a file counts as "added" only if it's missing
+  from the index, "modified" if its contents changed, and is skipped otherwise.
+  Closes [#206](https://github.com/colbymchenry/codegraph/issues/206). Thanks to
+  [@15290391025](https://github.com/15290391025) for the report.
 - **Indexing**: `codegraph init -i` now finds source inside nested, independent
 - **Indexing**: `codegraph init -i` now finds source inside nested, independent
   git repositories — separate clones living inside the workspace that are **not**
   git repositories — separate clones living inside the workspace that are **not**
   git submodules (common in CMake "super-repo" layouts). When the top-level
   git submodules (common in CMake "super-repo" layouts). When the top-level

+ 44 - 0
__tests__/sync.test.ts

@@ -225,6 +225,50 @@ describe('Sync Module', () => {
       expect(nodes.length).toBeGreaterThan(0);
       expect(nodes.length).toBeGreaterThan(0);
     });
     });
 
 
+    it('should stop reporting untracked files once they are indexed (issue #206)', async () => {
+      // Untracked files stay `??` in git status even after codegraph indexes
+      // them. Change detection must compare them against the DB by hash, not
+      // report every untracked file as "added" on every sync/status.
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'new.ts'),
+        `export function newFunc() { return 42; }`
+      );
+
+      // First sync indexes the untracked file.
+      const first = await cg.sync();
+      expect(first.filesAdded).toBe(1);
+
+      // The file is still untracked in git, but now lives in the DB.
+      expect(cg.searchNodes('newFunc').length).toBeGreaterThan(0);
+
+      // status must not keep flagging it as a pending addition...
+      const changes = cg.getChangedFiles();
+      expect(changes.added).not.toContain('src/new.ts');
+      expect(changes.modified).not.toContain('src/new.ts');
+
+      // ...and a second sync must be a no-op for it.
+      const second = await cg.sync();
+      expect(second.filesAdded).toBe(0);
+      expect(second.filesModified).toBe(0);
+    });
+
+    it('should re-index an untracked file when its contents change', async () => {
+      const filePath = path.join(testDir, 'src', 'new.ts');
+      fs.writeFileSync(filePath, `export function newFunc() { return 42; }`);
+      await cg.sync();
+
+      // Modify the still-untracked file.
+      fs.writeFileSync(filePath, `export function renamedFunc() { return 7; }`);
+
+      const changes = cg.getChangedFiles();
+      expect(changes.modified).toContain('src/new.ts');
+
+      const result = await cg.sync();
+      expect(result.filesModified).toBe(1);
+      expect(cg.searchNodes('renamedFunc').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('newFunc').length).toBe(0);
+    });
+
     it('should detect deleted files via git', async () => {
     it('should detect deleted files via git', async () => {
       fs.unlinkSync(path.join(testDir, 'src', 'index.ts'));
       fs.unlinkSync(path.join(testDir, 'src', 'index.ts'));
 
 

+ 11 - 16
src/extraction/index.ts

@@ -1261,8 +1261,12 @@ export class ExtractionOrchestrator {
         }
         }
       }
       }
 
 
-      // Handle modified files — read + hash only these files
-      for (const filePath of gitChanges.modified) {
+      // Handle modified + added files — read + hash only these. Untracked
+      // (`??`) files stay untracked in git even after we index them, so they
+      // can't be trusted as "new": re-hash and compare against the DB exactly
+      // like modified files. Otherwise every sync re-indexes them and status
+      // reports them as pending forever. (See issue #206.)
+      for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
         const fullPath = path.join(this.rootDir, filePath);
         const fullPath = path.join(this.rootDir, filePath);
         let content: string;
         let content: string;
         try {
         try {
@@ -1285,13 +1289,6 @@ export class ExtractionOrchestrator {
           filesModified++;
           filesModified++;
         }
         }
       }
       }
-
-      // Handle added (untracked) files
-      for (const filePath of gitChanges.added) {
-        filesToIndex.push(filePath);
-        changedFilePaths.push(filePath);
-        filesAdded++;
-      }
     } else {
     } else {
       // === Fallback: full scan (non-git project or git failure) ===
       // === Fallback: full scan (non-git project or git failure) ===
       const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
       const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
@@ -1395,8 +1392,11 @@ export class ExtractionOrchestrator {
         }
         }
       }
       }
 
 
-      // Modified files — read + hash only these, compare with DB
-      for (const filePath of gitChanges.modified) {
+      // Modified + added files — read + hash, compare with DB. Untracked (`??`)
+      // files stay untracked in git even after indexing, so they must be
+      // hash-compared like modified files instead of always counting as added —
+      // otherwise status reports them as pending forever. (See issue #206.)
+      for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
         const fullPath = path.join(this.rootDir, filePath);
         const fullPath = path.join(this.rootDir, filePath);
         let content: string;
         let content: string;
         try {
         try {
@@ -1416,11 +1416,6 @@ export class ExtractionOrchestrator {
         }
         }
       }
       }
 
 
-      // Added (untracked) files
-      for (const filePath of gitChanges.added) {
-        added.push(filePath);
-      }
-
       return { added, modified, removed };
       return { added, modified, removed };
     }
     }