Преглед изворни кода

Download embedding model to ~/.codegraph/models on install

The nomic-ai model is now downloaded during npm install via a postinstall
script and stored globally in ~/.codegraph/models (shared across projects)
instead of per-project in .codegraph/models.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Colby McHenry пре 5 месеци
родитељ
комит
3ad4d5b35d
4 измењених фајлова са 75 додато и 2 уклоњено
  1. 2 0
      package.json
  2. 68 0
      scripts/postinstall.js
  3. 0 1
      src/index.ts
  4. 5 1
      src/vectors/embedder.ts

+ 2 - 0
package.json

@@ -9,10 +9,12 @@
   },
   "files": [
     "dist",
+    "scripts",
     "README.md"
   ],
   "scripts": {
     "build": "tsc && npm run copy-assets",
+    "postinstall": "node scripts/postinstall.js",
     "copy-assets": "cp -r src/extraction/queries dist/extraction/ && cp src/db/schema.sql dist/db/",
     "dev": "tsc --watch",
     "cli": "npm run build && node dist/bin/codegraph.js",

+ 68 - 0
scripts/postinstall.js

@@ -0,0 +1,68 @@
+#!/usr/bin/env node
+/**
+ * Postinstall script - downloads the embedding model to ~/.codegraph/models
+ * This runs after `npm install` or `npx @colbymchenry/codegraph`
+ */
+const { existsSync, mkdirSync } = require('fs');
+const { join } = require('path');
+const { homedir } = require('os');
+
+const CODEGRAPH_DIR = join(homedir(), '.codegraph');
+const MODELS_DIR = join(CODEGRAPH_DIR, 'models');
+const MODEL_ID = 'nomic-ai/nomic-embed-text-v1.5';
+
+async function downloadModel() {
+  // Ensure directories exist
+  if (!existsSync(CODEGRAPH_DIR)) {
+    mkdirSync(CODEGRAPH_DIR, { recursive: true });
+  }
+  if (!existsSync(MODELS_DIR)) {
+    mkdirSync(MODELS_DIR, { recursive: true });
+  }
+
+  // Check if model is already cached
+  const modelCachePath = join(MODELS_DIR, MODEL_ID.replace('/', '/'));
+  if (existsSync(modelCachePath)) {
+    console.log('Embedding model already downloaded.');
+    return;
+  }
+
+  console.log('Downloading embedding model (~130MB)...');
+  console.log('This is a one-time download for semantic code search.\n');
+
+  try {
+    // Dynamic import for @xenova/transformers (ESM-only package)
+    const { pipeline, env } = await import('@xenova/transformers');
+
+    // Configure cache directory
+    env.cacheDir = MODELS_DIR;
+
+    // Download with progress
+    await pipeline('feature-extraction', MODEL_ID, {
+      progress_callback: (progress) => {
+        if (progress.status === 'progress' && progress.file && progress.progress !== undefined) {
+          const fileName = progress.file.split('/').pop();
+          const percent = Math.round(progress.progress);
+          process.stdout.write(`\rDownloading ${fileName}... ${percent}%   `);
+        } else if (progress.status === 'done') {
+          process.stdout.write('\n');
+        }
+      },
+    });
+
+    console.log('\nEmbedding model ready!');
+  } catch (error) {
+    // Don't fail the install if model download fails
+    // User can still use codegraph without semantic search
+    console.log('\nNote: Could not download embedding model.');
+    console.log('Semantic search will download it on first use.');
+    if (process.env.DEBUG) {
+      console.error(error);
+    }
+  }
+}
+
+downloadModel().catch(() => {
+  // Silent exit - don't break npm install
+  process.exit(0);
+});

+ 0 - 1
src/index.ts

@@ -760,7 +760,6 @@ export class CodeGraph {
     if (!this.vectorManager) {
       this.vectorManager = createVectorManager(this.db.getDb(), this.queries, {
         embedder: {
-          cacheDir: path.join(this.projectRoot, '.codegraph', 'models'),
           showProgress: true,
         },
       });

+ 5 - 1
src/vectors/embedder.ts

@@ -7,6 +7,10 @@
 
 import * as path from 'path';
 import * as fs from 'fs';
+import { homedir } from 'os';
+
+// Global model cache directory (shared across all projects)
+const GLOBAL_MODELS_DIR = path.join(homedir(), '.codegraph', 'models');
 
 // Dynamic import for @xenova/transformers (ESM-only package)
 // We use dynamic import to support CommonJS builds
@@ -89,7 +93,7 @@ export class TextEmbedder {
 
   constructor(options: EmbedderOptions = {}) {
     this.modelId = options.modelId || DEFAULT_MODEL;
-    this.cacheDir = options.cacheDir || '.codegraph/models';
+    this.cacheDir = options.cacheDir || GLOBAL_MODELS_DIR;
     this.showProgress = options.showProgress ?? false;
   }