1
0
Эх сурвалжийг харах

feat(extraction): enable same-file value-reference edges by default (TS/JS) (#895)

Value-reference edges (same-file `references` edges from a reader to the
file-scope const/var it reads) shipped behind CODEGRAPH_VALUE_REFS pending an
agent A/B. The A/B is in: on excalidraw the edges are correct and precise (node
count unchanged) and they transform the impact/blast-radius API — `impact` on a
const consumed by 103 readers goes from 1 affected symbol to the full radius.
That blast-radius API is what `codegraph impact` and CodeGraph Pro's verdict
engine consume, so the win is impact correctness; the agent path showed no
regression. Flip the default on; CODEGRAPH_VALUE_REFS=0 disables.

Also close the one precision gap the A/B surfaced: a bundled/Emscripten
`const Module` re-declared as an inner `var Module` / param produced false
positives (nested readers resolve to the inner binding). isGeneratedFile() is
path-only and can't catch content-minified bundles, so prune SHADOWED targets at
the syntax level — drop any value-ref target whose name is bound by more than one
`variable_declarator` in the file. On excalidraw this removes the 23 false
positives while preserving every real reader (impact unchanged at 170).

Adds regression coverage (there was none): same-file readers are edged, they
surface in the impact radius, shadowed consts are NOT edged, and
CODEGRAPH_VALUE_REFS=0 emits nothing.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 долоо хоног өмнө
parent
commit
2f6316500d

+ 4 - 0
CHANGELOG.md

@@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### New Features
+
+- Impact and blast-radius analysis for TypeScript/JavaScript now understands the readers of a constant. When you change a file-scope `const`/`var` — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default for TS/JS and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
+
 ### Fixes
 
 - `codegraph index` now rebuilds the full graph from scratch, so it produces the same result as a fresh `codegraph init` instead of reporting "0 nodes, 0 edges" and looking like it wiped your index. Previously, re-running `index` on an unchanged project skipped every file (their contents hadn't changed) and showed an empty-looking summary; it now clears and re-indexes for an honest, complete rebuild every time. Use `codegraph sync` for fast incremental updates between full rebuilds. Thanks @Arc-univer. (#874)

+ 117 - 0
__tests__/value-reference-edges.test.ts

@@ -0,0 +1,117 @@
+/**
+ * Value-reference edges (TS/JS): same-file `references` edges from a reader
+ * symbol to the file-scope const/var it reads, so impact analysis catches
+ * "change this constant, affect its readers". Default on; CODEGRAPH_VALUE_REFS=0
+ * disables. See TreeSitterExtractor.flushValueRefs.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import CodeGraph from '../src';
+
+function valueRefReaders(cg: CodeGraph, constName: string): string[] {
+  const target = cg.searchNodes(constName).map((r) => r.node).find((n) => n.name === constName);
+  if (!target) return [];
+  return cg
+    .getIncomingEdges(target.id)
+    .filter((e) => e.kind === 'references' && (e.metadata as { valueRef?: boolean } | undefined)?.valueRef)
+    .map((e) => cg.getNode(e.source)?.name)
+    .filter((n): n is string => Boolean(n));
+}
+
+describe('value-reference edges', () => {
+  let dir: string;
+  let cg: CodeGraph | undefined;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-valueref-'));
+  });
+  afterEach(() => {
+    cg?.destroy();
+    cg = undefined;
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  function index(): CodeGraph {
+    const g = CodeGraph.initSync(dir, { config: { include: ['**/*.ts', '**/*.tsx'], exclude: [] } });
+    return g;
+  }
+
+  it('edges same-file readers to the file-scope const they read (default on)', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'config.ts'),
+      [
+        'export const TABLE_CONFIG = { rows: 10, cols: 4 };',
+        'export function rowCount() { return TABLE_CONFIG.rows; }',
+        'export function describeTable() { return `${TABLE_CONFIG.rows}x${TABLE_CONFIG.cols}`; }',
+        'export const HEADER = TABLE_CONFIG.cols;',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    const readers = valueRefReaders(cg, 'TABLE_CONFIG');
+    // rowCount, describeTable, and the HEADER const all read TABLE_CONFIG.
+    expect(readers).toEqual(expect.arrayContaining(['rowCount', 'describeTable', 'HEADER']));
+  });
+
+  it('surfaces those readers in the impact radius of the const', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'palette.ts'),
+      [
+        'export const COLOR_PALETTE = { red: "#f00", blue: "#00f" };',
+        'export function pickRed() { return COLOR_PALETTE.red; }',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    const target = cg.searchNodes('COLOR_PALETTE').map((r) => r.node).find((n) => n.name === 'COLOR_PALETTE')!;
+    const impacted = [...cg.getImpactRadius(target.id).nodes.values()].map((n) => n.name);
+    expect(impacted).toContain('pickRed');
+  });
+
+  it('does NOT edge a shadowed const — inner re-declaration makes the name ambiguous', async () => {
+    // The Emscripten/bundled pattern: a file-scope `const Module` re-declared as
+    // an inner `var Module` / param. Nested readers resolve to the INNER binding,
+    // so a file-scope edge would be a false positive. The shadow guard drops it.
+    fs.writeFileSync(
+      path.join(dir, 'bundled.ts'),
+      [
+        'const Module = (function () {',
+        '  return function (Module) {',
+        '    var Module = typeof Module !== "undefined" ? Module : {};',
+        '    function locate() { return Module.path; }',
+        '    function getFunc() { return Module.lookup; }',
+        '    return { locate, getFunc };',
+        '  };',
+        '})();',
+        'export default Module;',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    // No reader should be edged to the outer `const Module`.
+    expect(valueRefReaders(cg, 'Module')).toEqual([]);
+  });
+
+  it('emits nothing when CODEGRAPH_VALUE_REFS=0', async () => {
+    const prev = process.env.CODEGRAPH_VALUE_REFS;
+    process.env.CODEGRAPH_VALUE_REFS = '0';
+    try {
+      fs.writeFileSync(
+        path.join(dir, 'config.ts'),
+        ['export const TABLE_CONFIG = { rows: 10 };', 'export function rowCount() { return TABLE_CONFIG.rows; }'].join('\n'),
+      );
+      cg = index();
+      await cg.indexAll();
+      expect(valueRefReaders(cg, 'TABLE_CONFIG')).toEqual([]);
+    } finally {
+      if (prev === undefined) delete process.env.CODEGRAPH_VALUE_REFS;
+      else process.env.CODEGRAPH_VALUE_REFS = prev;
+    }
+  });
+});

+ 38 - 5
src/extraction/tree-sitter.ts

@@ -221,11 +221,12 @@ export class TreeSitterExtractor {
   private nodes: Node[] = [];
   private edges: Edge[] = [];
   private unresolvedReferences: UnresolvedReference[] = [];
-  // Value-reference edges (flag-gated, default off; see flushValueRefs). Same-file reads of
-  // file-scope const/var symbols → `references` edges so impact analysis catches const consumers.
+  // Value-reference edges (default ON; set CODEGRAPH_VALUE_REFS=0 to disable; see flushValueRefs).
+  // Same-file reads of file-scope const/var symbols → `references` edges so impact analysis catches
+  // value consumers ("change this constant/table, affect its readers").
   private static readonly VALUE_REF_LANGS = new Set<string>(['typescript', 'javascript', 'tsx']);
   private static readonly MAX_VALUE_REF_NODES = 20_000;
-  private readonly valueRefsEnabled = process.env.CODEGRAPH_VALUE_REFS === '1';
+  private readonly valueRefsEnabled = process.env.CODEGRAPH_VALUE_REFS !== '0';
   private fileScopeValues = new Map<string, string>();
   private valueRefScopes: Array<{ id: string; node: SyntaxNode }> = [];
   private errors: ExtractionError[] = [];
@@ -544,8 +545,8 @@ export class TreeSitterExtractor {
    * The engine doesn't edge const→consumer, so impact analysis misses "change this table, affect
    * its readers" (the ReScript-PR false positive). Same-file only (resolution is unambiguous),
    * distinctive target names only (dodges the local-shadowing precision trap documented on
-   * function_ref), deduped per (reader, target). Flag-gated (CODEGRAPH_VALUE_REFS) + additive —
-   * pending the agent A/B before it goes default-on.
+   * function_ref), deduped per (reader, target). Default on (CODEGRAPH_VALUE_REFS=0 disables) +
+   * additive. Shadowed targets are pruned — see below.
    */
   private flushValueRefs(): void {
     const scopes = this.valueRefScopes;
@@ -555,6 +556,38 @@ export class TreeSitterExtractor {
     if (!this.valueRefsEnabled || !TreeSitterExtractor.VALUE_REF_LANGS.has(this.language)) return;
     if (targets.size === 0 || scopes.length === 0 || isGeneratedFile(this.filePath)) return;
 
+    // Prune SHADOWED targets. A name bound more than once in the file (e.g. a
+    // bundled/Emscripten `const Module` re-declared as an inner `var Module` /
+    // function param) resolves to the INNER binding for nested readers, so a
+    // file-scope edge to it is a false positive. Those inner re-declarations
+    // aren't extracted as graph nodes, so detect them at the syntax level:
+    // count `variable_declarator` names across the tree and drop any target
+    // bound twice or more. Single-binding (unambiguous) names are kept. This
+    // complements the path-based isGeneratedFile() check for content-minified
+    // bundles it can't catch by suffix.
+    if (this.tree) {
+      const declCounts = new Map<string, number>();
+      const dstack: SyntaxNode[] = [this.tree.rootNode];
+      let dvisited = 0;
+      while (dstack.length > 0 && dvisited < TreeSitterExtractor.MAX_VALUE_REF_NODES) {
+        const n = dstack.pop()!;
+        dvisited++;
+        if (n.type === 'variable_declarator') {
+          const nameNode = n.namedChild(0);
+          if (nameNode && nameNode.type === 'identifier') {
+            const nm = getNodeText(nameNode, this.source);
+            if (targets.has(nm)) declCounts.set(nm, (declCounts.get(nm) ?? 0) + 1);
+          }
+        }
+        for (let i = 0; i < n.namedChildCount; i++) {
+          const c = n.namedChild(i);
+          if (c) dstack.push(c);
+        }
+      }
+      for (const [nm, c] of declCounts) if (c > 1) targets.delete(nm);
+      if (targets.size === 0) return;
+    }
+
     for (const scope of scopes) {
       const seen = new Set<string>();
       const stack: SyntaxNode[] = [scope.node];