Sfoglia il codice sorgente

feat: Always enable embeddings, remove enableEmbeddings config option

Testing showed semantic search produces significantly better results for
natural language queries that Claude writes. FTS alone often ranks
properties above their parent classes and misses conceptual matches.
Embeddings are now always on — the vector manager is created eagerly,
with model download and embedding generation still happening lazily.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Colby McHenry 2 mesi fa
parent
commit
7e6914ddff
6 ha cambiato i file con 10 aggiunte e 29 eliminazioni
  1. 1 3
      README.md
  2. 3 2
      __tests__/vectors.test.ts
  3. 0 2
      src/config.ts
  4. 3 6
      src/index.ts
  5. 0 4
      src/types.ts
  6. 3 12
      src/visualizer/server.ts

+ 1 - 3
README.md

@@ -645,8 +645,7 @@ The `.codegraph/config.json` file controls indexing behavior:
   "frameworks": [],
   "maxFileSize": 1048576,
   "extractDocstrings": true,
-  "trackCallSites": true,
-  "enableEmbeddings": false
+  "trackCallSites": true
 }
 ```
 
@@ -660,7 +659,6 @@ The `.codegraph/config.json` file controls indexing behavior:
 | `maxFileSize` | Skip files larger than this (bytes) | `1048576` (1MB) |
 | `extractDocstrings` | Whether to extract docstrings from code | `true` |
 | `trackCallSites` | Whether to track call site locations | `true` |
-| `enableEmbeddings` | Enable semantic search embeddings | `false` |
 
 ## 🌐 Supported Languages
 

+ 3 - 2
__tests__/vectors.test.ts

@@ -286,9 +286,10 @@ export function processData(input: string): string {
       expect(cg.isEmbeddingsInitialized()).toBe(false);
     });
 
-    it('should return null embedding stats when not initialized', () => {
+    it('should return embedding stats even before initialization', () => {
       const stats = cg.getEmbeddingStats();
-      expect(stats).toBeNull();
+      expect(stats).not.toBeNull();
+      expect(stats!.totalVectors).toBe(0);
     });
 
     it('should throw when calling semanticSearch without initialization', async () => {

+ 0 - 2
src/config.ts

@@ -67,7 +67,6 @@ export function validateConfig(config: unknown): config is CodeGraphConfig {
   if (typeof c.maxFileSize !== 'number') return false;
   if (typeof c.extractDocstrings !== 'boolean') return false;
   if (typeof c.trackCallSites !== 'boolean') return false;
-  if (typeof c.enableEmbeddings !== 'boolean') return false;
 
   // Validate include/exclude are string arrays
   if (!c.include.every((p) => typeof p === 'string')) return false;
@@ -128,7 +127,6 @@ function mergeConfig(
     maxFileSize: overrides.maxFileSize ?? defaults.maxFileSize,
     extractDocstrings: overrides.extractDocstrings ?? defaults.extractDocstrings,
     trackCallSites: overrides.trackCallSites ?? defaults.trackCallSites,
-    enableEmbeddings: overrides.enableEmbeddings ?? defaults.enableEmbeddings,
     customPatterns: overrides.customPatterns ?? defaults.customPatterns,
   };
 }

+ 3 - 6
src/index.ts

@@ -157,12 +157,9 @@ export class CodeGraph {
     this.resolver = createResolver(projectRoot, queries);
     this.graphManager = new GraphQueryManager(queries);
     this.traverser = new GraphTraverser(queries);
-    // Vector manager is created lazily when embeddings are enabled
-    // Uses global ~/.codegraph/models directory for shared embedding models
-    if (config.enableEmbeddings) {
-      this.vectorManager = createVectorManager(db.getDb(), queries, {});
-    }
-    // Context builder (uses vector manager if available)
+    // Vector manager — always created, embeddings generated lazily on first use
+    this.vectorManager = createVectorManager(db.getDb(), queries, {});
+    // Context builder (uses vector manager for semantic search)
     this.contextBuilder = createContextBuilder(
       projectRoot,
       queries,

+ 0 - 4
src/types.ts

@@ -462,9 +462,6 @@ export interface CodeGraphConfig {
   /** Whether to track call sites */
   trackCallSites: boolean;
 
-  /** Whether to compute embeddings for semantic search */
-  enableEmbeddings: boolean;
-
   /** Custom symbol patterns to extract */
   customPatterns?: {
     /** Name for this pattern group */
@@ -675,7 +672,6 @@ export const DEFAULT_CONFIG: CodeGraphConfig = {
   maxFileSize: 1024 * 1024, // 1MB
   extractDocstrings: true,
   trackCallSites: true,
-  enableEmbeddings: false,
 };
 
 // =============================================================================

+ 3 - 12
src/visualizer/server.ts

@@ -236,16 +236,14 @@ ${symbolIndex}`;
 
       // GET /api/embeddings/status
       if (pathname === '/api/embeddings/status') {
-        const config = this.cg.getConfig();
         const embeddingStats = this.cg.getEmbeddingStats();
-        const isEnabled = config.enableEmbeddings === true;
         const isInitialized = this.cg.isEmbeddingsInitialized();
         const totalVectors = embeddingStats?.totalVectors ?? 0;
         const stats = this.cg.getStats();
         // Consider ready if we have vectors for at least half the eligible nodes
         const eligibleNodes = stats.nodeCount - (stats.nodesByKind.file ?? 0) - (stats.nodesByKind.import ?? 0);
-        const isReady = isEnabled && totalVectors > 0 && totalVectors >= eligibleNodes * 0.5;
-        json({ isEnabled, isInitialized, isReady, totalVectors, eligibleNodes });
+        const isReady = totalVectors > 0 && totalVectors >= eligibleNodes * 0.5;
+        json({ isEnabled: true, isInitialized, isReady, totalVectors, eligibleNodes });
         return;
       }
 
@@ -262,14 +260,7 @@ ${symbolIndex}`;
         };
 
         try {
-          // Step 1: Enable embeddings in config
-          send('status', { phase: 'config', message: 'Enabling embeddings...' });
-          const config = this.cg.getConfig();
-          if (!config.enableEmbeddings) {
-            this.cg.updateConfig({ enableEmbeddings: true });
-          }
-
-          // Step 2: Initialize embedding model (downloads on first use)
+          // Step 1: Initialize embedding model (downloads on first use)
           send('status', { phase: 'model', message: 'Loading embedding model (first time may download ~30MB)...' });
           await this.cg.initializeEmbeddings();
           send('status', { phase: 'model', message: 'Embedding model ready' });