Explorar el Código

feat: Extract type references from annotations and improve symbol query matching

Adds type annotation parsing to create references edges for parameter types, return types, and variable type annotations in TypeScript and other typed languages. Expands symbol extraction from queries to capture lowercase identifiers and filters out more common English words. Removes obsolete search utility tests.
Colby McHenry hace 2 meses
padre
commit
4af51f565b
Se han modificado 5 ficheros con 141 adiciones y 150 borrados
  1. 1 1
      CLAUDE.md
  2. 0 135
      __tests__/search.test.ts
  3. 19 2
      src/context/index.ts
  4. 118 2
      src/extraction/tree-sitter.ts
  5. 3 10
      src/installer/claude-md-template.ts

+ 1 - 1
CLAUDE.md

@@ -128,7 +128,7 @@ codegraph serve --mcp       # Start MCP server
 
 ## MCP Tools Best Practices
 
-These tools are designed to be used by **Explore agents** for faster codebase exploration:
+Use these tools **directly in the main session** for fast code exploration (replaces the need for Explore agents in most cases):
 
 | Tool | Use For |
 |------|---------|

+ 0 - 135
__tests__/search.test.ts

@@ -1,135 +0,0 @@
-/**
- * Search Query Utilities Tests
- *
- * Tests multi-signal scoring, kind bonuses, and path relevance.
- */
-
-import { describe, it, expect } from 'vitest';
-import {
-  extractSearchTerms,
-  scorePathRelevance,
-  kindBonus,
-  STOP_WORDS,
-} from '../src/search/query-utils';
-
-describe('Search Query Utilities', () => {
-  describe('extractSearchTerms', () => {
-    it('should extract meaningful terms from a query', () => {
-      const terms = extractSearchTerms('find the login handler');
-      expect(terms).toContain('login');
-      expect(terms).toContain('handler');
-      // 'find' and 'the' are stop words
-      expect(terms).not.toContain('find');
-      expect(terms).not.toContain('the');
-    });
-
-    it('should filter stop words', () => {
-      const terms = extractSearchTerms('how does the authentication work');
-      expect(terms).not.toContain('how');
-      expect(terms).not.toContain('does');
-      expect(terms).not.toContain('the');
-      expect(terms).toContain('authentication');
-      expect(terms).toContain('work');
-    });
-
-    it('should handle camelCase by lowercasing', () => {
-      const terms = extractSearchTerms('UserService');
-      expect(terms).toContain('userservice');
-    });
-
-    it('should strip punctuation', () => {
-      const terms = extractSearchTerms('payment.process()');
-      expect(terms).toContain('payment');
-      expect(terms).toContain('process');
-    });
-
-    it('should return empty for all stop words', () => {
-      const terms = extractSearchTerms('how do I get the');
-      expect(terms).toHaveLength(0);
-    });
-
-    it('should filter single-character terms', () => {
-      const terms = extractSearchTerms('a b c auth');
-      expect(terms).toEqual(['auth']);
-    });
-  });
-
-  describe('scorePathRelevance', () => {
-    it('should score filename matches highest', () => {
-      const score = scorePathRelevance('src/auth/login.ts', 'login');
-      expect(score).toBeGreaterThanOrEqual(10);
-    });
-
-    it('should score directory matches', () => {
-      const score = scorePathRelevance('src/auth/index.ts', 'auth');
-      expect(score).toBeGreaterThanOrEqual(5);
-    });
-
-    it('should return 0 for unrelated paths', () => {
-      const score = scorePathRelevance('src/utils/format.ts', 'payment');
-      expect(score).toBe(0);
-    });
-
-    it('should accumulate scores for multiple matching terms', () => {
-      const score = scorePathRelevance('src/auth/login.ts', 'auth login');
-      // Both 'auth' (dir match) and 'login' (filename match)
-      expect(score).toBeGreaterThanOrEqual(15);
-    });
-
-    it('should return 0 for empty query terms', () => {
-      const score = scorePathRelevance('src/auth/login.ts', 'the a an');
-      expect(score).toBe(0);
-    });
-  });
-
-  describe('kindBonus', () => {
-    it('should give functions and methods highest bonus', () => {
-      expect(kindBonus('function')).toBe(10);
-      expect(kindBonus('method')).toBe(10);
-    });
-
-    it('should rank functions > classes > variables > imports', () => {
-      expect(kindBonus('function')).toBeGreaterThan(kindBonus('class'));
-      expect(kindBonus('class')).toBeGreaterThan(kindBonus('variable'));
-      expect(kindBonus('variable')).toBeGreaterThan(kindBonus('import'));
-    });
-
-    it('should give routes high priority', () => {
-      expect(kindBonus('route')).toBeGreaterThanOrEqual(9);
-    });
-
-    it('should give components high priority', () => {
-      expect(kindBonus('component')).toBeGreaterThanOrEqual(8);
-    });
-
-    it('should return 0 for parameter and file kinds', () => {
-      expect(kindBonus('parameter')).toBe(0);
-      expect(kindBonus('file')).toBe(0);
-    });
-
-    it('should return 0 for unknown kinds', () => {
-      expect(kindBonus('unknown_kind' as any)).toBe(0);
-    });
-  });
-
-  describe('STOP_WORDS', () => {
-    it('should contain common English stop words', () => {
-      expect(STOP_WORDS.has('the')).toBe(true);
-      expect(STOP_WORDS.has('and')).toBe(true);
-      expect(STOP_WORDS.has('or')).toBe(true);
-    });
-
-    it('should contain action verbs used in queries', () => {
-      expect(STOP_WORDS.has('find')).toBe(true);
-      expect(STOP_WORDS.has('show')).toBe(true);
-      expect(STOP_WORDS.has('get')).toBe(true);
-      expect(STOP_WORDS.has('list')).toBe(true);
-    });
-
-    it('should not contain technical terms', () => {
-      expect(STOP_WORDS.has('function')).toBe(false);
-      expect(STOP_WORDS.has('class')).toBe(false);
-      expect(STOP_WORDS.has('auth')).toBe(false);
-    });
-  });
-});

+ 19 - 2
src/context/index.ts

@@ -83,13 +83,30 @@ function extractSymbolsFromQuery(query: string): string[] {
     }
   }
 
-  // Filter out common English words that might match patterns
+  // Extract plain lowercase identifiers (3+ chars, not already matched)
+  // Catches symbol names like "undo", "redo", "history", "render", "parse"
+  const lowercasePattern = /\b([a-z][a-z0-9]{2,})\b/g;
+  while ((match = lowercasePattern.exec(query)) !== null) {
+    if (match[1]) {
+      symbols.add(match[1]);
+    }
+  }
+
+  // Filter out common English words that aren't likely symbol names
   const commonWords = new Set([
     'the', 'and', 'for', 'with', 'from', 'this', 'that', 'have', 'been',
     'will', 'would', 'could', 'should', 'does', 'done', 'make', 'made',
     'use', 'used', 'using', 'work', 'works', 'find', 'found', 'show',
     'call', 'called', 'calling', 'get', 'set', 'add', 'all', 'any',
-    'how', 'what', 'when', 'where', 'which', 'who', 'why'
+    'how', 'what', 'when', 'where', 'which', 'who', 'why',
+    'not', 'but', 'are', 'was', 'were', 'has', 'had', 'its',
+    'can', 'did', 'may', 'also', 'into', 'than', 'then', 'them',
+    'each', 'other', 'some', 'such', 'only', 'same', 'about',
+    'after', 'before', 'between', 'through', 'during', 'without',
+    'again', 'further', 'once', 'here', 'there', 'both', 'just',
+    'more', 'most', 'very', 'being', 'having', 'doing',
+    'system', 'need', 'needs', 'want', 'wants', 'like', 'look',
+    'change', 'changes', 'changed', 'changing',
   ]);
 
   return Array.from(symbols).filter(s => !commonWords.has(s.toLowerCase()));

+ 118 - 2
src/extraction/tree-sitter.ts

@@ -1215,6 +1215,9 @@ export class TreeSitterExtractor {
     });
     if (!funcNode) return;
 
+    // Extract type annotations (parameter types and return type)
+    this.extractTypeAnnotations(node, funcNode.id);
+
     // Push to stack and visit body
     this.nodeStack.push(funcNode.id);
     // Dart: function_body is a next sibling of function_signature, not a child
@@ -1299,6 +1302,9 @@ export class TreeSitterExtractor {
     });
     if (!methodNode) return;
 
+    // Extract type annotations (parameter types and return type)
+    this.extractTypeAnnotations(node, methodNode.id);
+
     // Push to stack and visit body
     this.nodeStack.push(methodNode.id);
     // Dart: function_body is a next sibling of method_signature, not a child
@@ -1425,11 +1431,16 @@ export class TreeSitterExtractor {
             const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
             const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
 
-            this.createNode(kind, name, child, {
+            const varNode = this.createNode(kind, name, child, {
               docstring,
               signature: initSignature,
               isExported,
             });
+
+            // Extract type annotation references (e.g., const x: ITextModel = ...)
+            if (varNode) {
+              this.extractVariableTypeAnnotation(child, varNode.id);
+            }
           }
         }
       }
@@ -1527,10 +1538,20 @@ export class TreeSitterExtractor {
     const docstring = getPrecedingDocstring(node, this.source);
     const isExported = this.extractor.isExported?.(node, this.source);
 
-    this.createNode('type_alias', name, node, {
+    const typeAliasNode = this.createNode('type_alias', name, node, {
       docstring,
       isExported,
     });
+
+    // Extract type references from the alias value (e.g., `type X = ITextModel | null`)
+    if (typeAliasNode && this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) {
+      // The value is everything after the `=`, which is typically the last named child
+      // In tree-sitter TS: type_alias_declaration has name + value children
+      const value = getChildByField(node, 'value');
+      if (value) {
+        this.extractTypeRefsFromSubtree(value, typeAliasNode.id);
+      }
+    }
   }
 
   /**
@@ -2103,6 +2124,101 @@ export class TreeSitterExtractor {
     }
   }
 
+  /**
+   * Languages that support type annotations (TypeScript, etc.)
+   */
+  private readonly TYPE_ANNOTATION_LANGUAGES = new Set([
+    'typescript', 'tsx', 'dart', 'kotlin', 'swift', 'rust', 'go', 'java', 'csharp',
+  ]);
+
+  /**
+   * Built-in/primitive type names that shouldn't create references
+   */
+  private readonly BUILTIN_TYPES = new Set([
+    'string', 'number', 'boolean', 'void', 'null', 'undefined', 'never', 'any', 'unknown',
+    'object', 'symbol', 'bigint', 'true', 'false',
+    // Rust
+    'str', 'bool', 'i8', 'i16', 'i32', 'i64', 'i128', 'isize',
+    'u8', 'u16', 'u32', 'u64', 'u128', 'usize', 'f32', 'f64', 'char',
+    // Java/C#
+    'int', 'long', 'short', 'byte', 'float', 'double', 'char',
+    // Go
+    'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
+    'float32', 'float64', 'complex64', 'complex128', 'rune', 'error',
+  ]);
+
+  /**
+   * Extract type references from type annotations on a function/method/field node.
+   * Creates 'references' edges for parameter types, return types, and field types.
+   */
+  private extractTypeAnnotations(node: SyntaxNode, nodeId: string): void {
+    if (!this.extractor) return;
+    if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
+
+    // Extract parameter type annotations
+    const params = getChildByField(node, this.extractor.paramsField || 'parameters');
+    if (params) {
+      this.extractTypeRefsFromSubtree(params, nodeId);
+    }
+
+    // Extract return type annotation
+    const returnType = getChildByField(node, this.extractor.returnField || 'return_type');
+    if (returnType) {
+      this.extractTypeRefsFromSubtree(returnType, nodeId);
+    }
+
+    // Extract direct type annotation (for class fields like `model: ITextModel`)
+    const typeAnnotation = node.namedChildren.find(
+      (c: SyntaxNode) => c.type === 'type_annotation'
+    );
+    if (typeAnnotation) {
+      this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
+    }
+  }
+
+  /**
+   * Extract type references from a variable's type annotation.
+   */
+  private extractVariableTypeAnnotation(node: SyntaxNode, nodeId: string): void {
+    if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
+
+    // Find type_annotation child (covers TS `: Type`, Rust `: Type`, etc.)
+    const typeAnnotation = node.namedChildren.find(
+      (c: SyntaxNode) => c.type === 'type_annotation'
+    );
+    if (typeAnnotation) {
+      this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
+    }
+  }
+
+  /**
+   * Recursively walk a subtree and extract all type_identifier references.
+   * Handles unions, intersections, generics, arrays, etc.
+   */
+  private extractTypeRefsFromSubtree(node: SyntaxNode, fromNodeId: string): void {
+    if (node.type === 'type_identifier') {
+      const typeName = getNodeText(node, this.source);
+      if (typeName && !this.BUILTIN_TYPES.has(typeName)) {
+        this.unresolvedReferences.push({
+          fromNodeId,
+          referenceName: typeName,
+          referenceKind: 'references',
+          line: node.startPosition.row + 1,
+          column: node.startPosition.column,
+        });
+      }
+      return; // type_identifier is a leaf
+    }
+
+    // Recurse into children (handles union_type, intersection_type, generic_type, etc.)
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child) {
+        this.extractTypeRefsFromSubtree(child, fromNodeId);
+      }
+    }
+  }
+
   /**
    * Handle Pascal-specific AST structures.
    * Returns true if the node was fully handled and children should be skipped.

+ 3 - 10
src/installer/claude-md-template.ts

@@ -16,25 +16,18 @@ CodeGraph builds a semantic knowledge graph of codebases for faster, smarter cod
 
 ### If \`.codegraph/\` exists in the project
 
-**Use codegraph tools for faster exploration.** These tools provide instant lookups via the code graph instead of scanning files:
+**Use codegraph tools directly in the main session.** Codegraph replaces the need for Explore agents in most cases. Instead of spawning an agent (which takes 30+ tool calls and 1+ minutes), use codegraph MCP tools directly for fast, structured answers:
 
 | Tool | Use For |
 |------|---------|
+| \`codegraph_context\` | Get relevant code context for a task (great starting point) |
 | \`codegraph_search\` | Find symbols by name (functions, classes, types) |
-| \`codegraph_context\` | Get relevant code context for a task |
 | \`codegraph_callers\` | Find what calls a function |
 | \`codegraph_callees\` | Find what a function calls |
 | \`codegraph_impact\` | See what's affected by changing a symbol |
 | \`codegraph_node\` | Get details + source code for a symbol |
 
-**When spawning Explore agents in a codegraph-enabled project:**
-
-Tell the Explore agent to use codegraph tools for faster exploration.
-
-**For quick lookups in the main session:**
-- Use \`codegraph_search\` instead of grep for finding symbols
-- Use \`codegraph_callers\`/\`codegraph_callees\` to trace code flow
-- Use \`codegraph_impact\` before making changes to see what's affected
+**Do NOT tell Explore agents to use codegraph tools.** Testing shows Explore agents use codegraph for discovery then still read all the same files — making them slower, not faster. Codegraph's value is in the main session where it replaces the need for exhaustive file reading.
 
 ### If \`.codegraph/\` does NOT exist