ソースを参照

Add Liquid template language support for Shopify themes

- Add 'liquid' to Language type and default include patterns
- Create LiquidExtractor with regex-based extraction (tree-sitter-liquid has ABI issues)
- Extract render/include/section references as component nodes
- Extract schema blocks as constant nodes with parsed names
- Extract assign statements as variable nodes
- Create file relationship edges for snippet/section references

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Colby McHenry 5 ヶ月 前
コミット
a4ddedb197
5 ファイル変更343 行追加6 行削除
  1. 21 2
      package-lock.json
  2. 3 2
      package.json
  3. 11 1
      src/extraction/grammars.ts
  4. 305 1
      src/extraction/tree-sitter.ts
  5. 3 0
      src/types.ts

+ 21 - 2
package-lock.json

@@ -1,12 +1,12 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.1.9",
+  "version": "0.2.6",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@colbymchenry/codegraph",
-      "version": "0.1.9",
+      "version": "0.2.6",
       "license": "MIT",
       "dependencies": {
         "@xenova/transformers": "^2.17.0",
@@ -22,6 +22,7 @@
         "tree-sitter-java": "^0.23.5",
         "tree-sitter-javascript": "^0.23.1",
         "tree-sitter-kotlin": "^0.3.8",
+        "tree-sitter-liquid": "github:hankthetank27/tree-sitter-liquid",
         "tree-sitter-php": "^0.23.11",
         "tree-sitter-python": "^0.23.6",
         "tree-sitter-ruby": "^0.23.1",
@@ -2431,6 +2432,24 @@
       "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==",
       "license": "MIT"
     },
+    "node_modules/tree-sitter-liquid": {
+      "version": "0.1.0",
+      "resolved": "git+ssh://git@github.com/hankthetank27/tree-sitter-liquid.git#d6ebde3974742cd1b61b55d1d94aab1dacb41056",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-addon-api": "^8.0.0",
+        "node-gyp-build": "^4.8.1"
+      },
+      "peerDependencies": {
+        "tree-sitter": "^0.21.1"
+      },
+      "peerDependenciesMeta": {
+        "tree_sitter": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/tree-sitter-php": {
       "version": "0.23.12",
       "resolved": "https://registry.npmjs.org/tree-sitter-php/-/tree-sitter-php-0.23.12.tgz",

+ 3 - 2
package.json

@@ -1,6 +1,6 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.2.5",
+  "version": "0.2.6",
   "description": "A local-first code intelligence system that builds a semantic knowledge graph from any codebase",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
@@ -31,9 +31,9 @@
   "license": "MIT",
   "dependencies": {
     "@xenova/transformers": "^2.17.0",
-    "figlet": "^1.8.0",
     "better-sqlite3": "^11.0.0",
     "commander": "^14.0.2",
+    "figlet": "^1.8.0",
     "sqlite-vss": "^0.1.2",
     "tree-sitter": "^0.22.4",
     "tree-sitter-c": "^0.23.4",
@@ -43,6 +43,7 @@
     "tree-sitter-java": "^0.23.5",
     "tree-sitter-javascript": "^0.23.1",
     "tree-sitter-kotlin": "^0.3.8",
+    "tree-sitter-liquid": "github:hankthetank27/tree-sitter-liquid",
     "tree-sitter-php": "^0.23.11",
     "tree-sitter-python": "^0.23.6",
     "tree-sitter-ruby": "^0.23.1",

+ 11 - 1
src/extraction/grammars.ts

@@ -36,6 +36,8 @@ const Ruby = require('tree-sitter-ruby');
 const Swift = require('tree-sitter-swift');
 // eslint-disable-next-line @typescript-eslint/no-require-imports
 const Kotlin = require('tree-sitter-kotlin');
+// Note: tree-sitter-liquid has ABI compatibility issues with tree-sitter 0.22+
+// Liquid extraction is handled separately via regex in tree-sitter.ts
 
 /**
  * Mapping of Language to tree-sitter grammar
@@ -56,6 +58,7 @@ const GRAMMAR_MAP: Record<string, unknown> = {
   ruby: Ruby,
   swift: Swift,
   kotlin: Kotlin,
+  // liquid: uses custom regex-based extraction, not tree-sitter
 };
 
 /**
@@ -87,6 +90,7 @@ export const EXTENSION_MAP: Record<string, Language> = {
   '.swift': 'swift',
   '.kt': 'kotlin',
   '.kts': 'kotlin',
+  '.liquid': 'liquid',
 };
 
 /**
@@ -129,6 +133,8 @@ export function detectLanguage(filePath: string): Language {
  * Check if a language is supported
  */
 export function isLanguageSupported(language: Language): boolean {
+  // Liquid uses custom regex-based extraction, not tree-sitter
+  if (language === 'liquid') return true;
   return language !== 'unknown' && language in GRAMMAR_MAP;
 }
 
@@ -136,7 +142,10 @@ export function isLanguageSupported(language: Language): boolean {
  * Get all supported languages
  */
 export function getSupportedLanguages(): Language[] {
-  return Object.keys(GRAMMAR_MAP) as Language[];
+  const languages = Object.keys(GRAMMAR_MAP) as Language[];
+  // Add Liquid which uses custom extraction
+  languages.push('liquid');
+  return languages;
 }
 
 /**
@@ -166,6 +175,7 @@ export function getLanguageDisplayName(language: Language): string {
     ruby: 'Ruby',
     swift: 'Swift',
     kotlin: 'Kotlin',
+    liquid: 'Liquid',
     unknown: 'Unknown',
   };
   return names[language] || language;

+ 305 - 1
src/extraction/tree-sitter.ts

@@ -1216,6 +1216,302 @@ export class TreeSitterExtractor {
   }
 }
 
+/**
+ * LiquidExtractor - Extracts relationships from Liquid template files
+ *
+ * Liquid is a templating language (used by Shopify, Jekyll, etc.) that doesn't
+ * have traditional functions or classes. Instead, we extract:
+ * - Section references ({% section 'name' %})
+ * - Snippet references ({% render 'name' %} and {% include 'name' %})
+ * - Schema blocks ({% schema %}...{% endschema %})
+ */
+export class LiquidExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  /**
+   * Extract from Liquid source
+   */
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    try {
+      // Create file node
+      const fileNode = this.createFileNode();
+
+      // Extract render/include statements (snippet references)
+      this.extractSnippetReferences(fileNode.id);
+
+      // Extract section references
+      this.extractSectionReferences(fileNode.id);
+
+      // Extract schema block
+      this.extractSchema(fileNode.id);
+
+      // Extract assign statements as variables
+      this.extractAssignments(fileNode.id);
+    } catch (error) {
+      this.errors.push({
+        message: `Liquid extraction error: ${error instanceof Error ? error.message : String(error)}`,
+        severity: 'error',
+      });
+    }
+
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  /**
+   * Create a file node for the Liquid template
+   */
+  private createFileNode(): Node {
+    const lines = this.source.split('\n');
+    const id = generateNodeId(this.filePath, 'file', this.filePath, 1);
+
+    const fileNode: Node = {
+      id,
+      kind: 'file',
+      name: this.filePath.split('/').pop() || this.filePath,
+      qualifiedName: this.filePath,
+      filePath: this.filePath,
+      language: 'liquid',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length || 0,
+      updatedAt: Date.now(),
+    };
+
+    this.nodes.push(fileNode);
+    return fileNode;
+  }
+
+  /**
+   * Extract {% render 'snippet' %} and {% include 'snippet' %} references
+   */
+  private extractSnippetReferences(fileNodeId: string): void {
+    // Match {% render 'name' %} or {% include 'name' %} with optional parameters
+    const renderRegex = /\{%[-]?\s*(render|include)\s+['"]([^'"]+)['"]/g;
+    let match;
+
+    while ((match = renderRegex.exec(this.source)) !== null) {
+      const [, tagType, snippetName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create a component node for the snippet reference
+      const nodeId = generateNodeId(this.filePath, 'component', `${tagType}:${snippetName}`, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'component',
+        name: snippetName!,
+        qualifiedName: `${this.filePath}::${tagType}:${snippetName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + match[0].length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+
+      // Add unresolved reference to the snippet file
+      this.unresolvedReferences.push({
+        fromNodeId: fileNodeId,
+        referenceName: `snippets/${snippetName}.liquid`,
+        referenceKind: 'references',
+        line,
+        column: match.index - this.getLineStart(line),
+      });
+    }
+  }
+
+  /**
+   * Extract {% section 'name' %} references
+   */
+  private extractSectionReferences(fileNodeId: string): void {
+    // Match {% section 'name' %}
+    const sectionRegex = /\{%[-]?\s*section\s+['"]([^'"]+)['"]/g;
+    let match;
+
+    while ((match = sectionRegex.exec(this.source)) !== null) {
+      const [, sectionName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create a component node for the section reference
+      const nodeId = generateNodeId(this.filePath, 'component', `section:${sectionName}`, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'component',
+        name: sectionName!,
+        qualifiedName: `${this.filePath}::section:${sectionName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + match[0].length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+
+      // Add unresolved reference to the section file
+      this.unresolvedReferences.push({
+        fromNodeId: fileNodeId,
+        referenceName: `sections/${sectionName}.liquid`,
+        referenceKind: 'references',
+        line,
+        column: match.index - this.getLineStart(line),
+      });
+    }
+  }
+
+  /**
+   * Extract {% schema %}...{% endschema %} blocks
+   */
+  private extractSchema(fileNodeId: string): void {
+    // Match {% schema %}...{% endschema %}
+    const schemaRegex = /\{%[-]?\s*schema\s*[-]?%\}([\s\S]*?)\{%[-]?\s*endschema\s*[-]?%\}/g;
+    let match;
+
+    while ((match = schemaRegex.exec(this.source)) !== null) {
+      const [fullMatch, schemaContent] = match;
+      const startLine = this.getLineNumber(match.index);
+      const endLine = this.getLineNumber(match.index + fullMatch.length);
+
+      // Try to parse the schema JSON to get the name
+      let schemaName = 'schema';
+      try {
+        const schemaJson = JSON.parse(schemaContent!);
+        if (schemaJson.name) {
+          schemaName = schemaJson.name;
+        }
+      } catch {
+        // Schema isn't valid JSON, use default name
+      }
+
+      // Create a node for the schema
+      const nodeId = generateNodeId(this.filePath, 'constant', `schema:${schemaName}`, startLine);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'constant',
+        name: schemaName,
+        qualifiedName: `${this.filePath}::schema:${schemaName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine,
+        endLine,
+        startColumn: match.index - this.getLineStart(startLine),
+        endColumn: 0,
+        docstring: schemaContent?.trim().substring(0, 200), // Store first 200 chars as docstring
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+    }
+  }
+
+  /**
+   * Extract {% assign var = value %} statements
+   */
+  private extractAssignments(fileNodeId: string): void {
+    // Match {% assign variable_name = ... %}
+    const assignRegex = /\{%[-]?\s*assign\s+(\w+)\s*=/g;
+    let match;
+
+    while ((match = assignRegex.exec(this.source)) !== null) {
+      const [, variableName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create a variable node
+      const nodeId = generateNodeId(this.filePath, 'variable', variableName!, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'variable',
+        name: variableName!,
+        qualifiedName: `${this.filePath}::${variableName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + match[0].length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+    }
+  }
+
+  /**
+   * Get the line number for a character index
+   */
+  private getLineNumber(index: number): number {
+    const substring = this.source.substring(0, index);
+    return (substring.match(/\n/g) || []).length + 1;
+  }
+
+  /**
+   * Get the character index of the start of a line
+   */
+  private getLineStart(lineNumber: number): number {
+    const lines = this.source.split('\n');
+    let index = 0;
+    for (let i = 0; i < lineNumber - 1 && i < lines.length; i++) {
+      index += lines[i]!.length + 1; // +1 for newline
+    }
+    return index;
+  }
+}
+
 /**
  * Extract nodes and edges from source code
  */
@@ -1224,6 +1520,14 @@ export function extractFromSource(
   source: string,
   language?: Language
 ): ExtractionResult {
-  const extractor = new TreeSitterExtractor(filePath, source, language);
+  const detectedLanguage = language || detectLanguage(filePath);
+
+  // Use custom extractor for Liquid
+  if (detectedLanguage === 'liquid') {
+    const extractor = new LiquidExtractor(filePath, source);
+    return extractor.extract();
+  }
+
+  const extractor = new TreeSitterExtractor(filePath, source, detectedLanguage);
   return extractor.extract();
 }

+ 3 - 0
src/types.ts

@@ -71,6 +71,7 @@ export type Language =
   | 'ruby'
   | 'swift'
   | 'kotlin'
+  | 'liquid'
   | 'unknown';
 
 // =============================================================================
@@ -496,6 +497,8 @@ export const DEFAULT_CONFIG: CodeGraphConfig = {
     '**/*.php',
     // Ruby
     '**/*.rb',
+    // Liquid (Shopify themes)
+    '**/*.liquid',
   ],
   exclude: [
     // Version control