Kaynağa Gözat

feat(extraction): same-file value-reference edges for impact analysis — 15 languages (#897)

Adds same-file value-reference edges (reader symbol → const/var it reads) so impact analysis catches a constant's same-file consumers, closing the 'change this table, break its readers' hole. 15 languages validated S/M/L on public OSS: TS/JS/tsx, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, Pascal/Delphi (+ Svelte/Vue/Astro inherited). Edges-only — node count identical on/off; default ON, CODEGRAPH_VALUE_REFS=0 opts out.
Colby Mchenry 6 gün önce
ebeveyn
işleme
f34f606342

+ 5 - 1
CHANGELOG.md

@@ -11,7 +11,11 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
-- Impact and blast-radius analysis for TypeScript/JavaScript now understands the readers of a constant. When you change a file-scope `const`/`var` — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default for TS/JS and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
+- Impact and blast-radius analysis for TypeScript, JavaScript, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, and Pascal/Delphi now understands the readers of a constant. When you change a file-scope, package-level, module-level, or class-level constant — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
+- C file-scope constants and globals — `static const` scalars, pointer/array lookup tables, and shared mutable globals — are now recognized as symbols in their own right. They previously weren't extracted at all, so they never appeared in search or carried any dependents; now they show up in `codegraph search` and participate in impact analysis (see above), so changing a C lookup table surfaces the same-file functions that read it.
+- Java `static final` constants, C# `const` / `static readonly` constants, Scala `object` vals, and Kotlin top-level / `object` / `companion object` `val`s are now classified as constants rather than generic fields, so they participate in the constant-reader impact analysis above — change a `public static final` table, a `const string`, a Scala `object Config { val Timeout = … }`, or a Kotlin `companion object { const val … }` and the methods that read it now show up as affected. (Per-object Java `final` / C# `readonly` / Scala & Kotlin `class` instance properties are unchanged.) Kotlin constants were previously not indexed as their own symbols at all, so they now also appear in `codegraph search`.
+- Swift top-level `let`s and `static let` constants (including those namespaced in an `enum`/`struct`, the common Swift pattern) are now indexed as constants and participate in the constant-reader impact analysis above — change a `static let defaultRetryLimit` or an `enum Constants { static let … }` and the same-file code that reads it shows up as affected. Computed properties and per-instance `let`s are not treated as constants.
+- Dart top-level `const`/`final` and class `static const`/`static final` constants are now indexed as constants and participate in the constant-reader impact analysis above. Instance fields, `var`s, and locals are not treated as constants. (Generated Dart code with the standard `.g.dart`/`.freezed.dart`/`.pb.dart` suffixes is already skipped.)
 
 ### Fixes
 

+ 614 - 7
__tests__/value-reference-edges.test.ts

@@ -12,13 +12,20 @@ import * as os from 'os';
 import CodeGraph from '../src';
 
 function valueRefReaders(cg: CodeGraph, constName: string): string[] {
-  const target = cg.searchNodes(constName).map((r) => r.node).find((n) => n.name === constName);
-  if (!target) return [];
-  return cg
-    .getIncomingEdges(target.id)
-    .filter((e) => e.kind === 'references' && (e.metadata as { valueRef?: boolean } | undefined)?.valueRef)
-    .map((e) => cg.getNode(e.source)?.name)
-    .filter((n): n is string => Boolean(n));
+  // Aggregate across ALL nodes of this name — a conditionally-defined module
+  // const (`try: X=…; except: X=…`) has more than one, and the edge targets
+  // whichever one ended up in the target map.
+  const targets = cg.searchNodes(constName).map((r) => r.node).filter((n) => n.name === constName);
+  const readers = new Set<string>();
+  for (const t of targets) {
+    for (const e of cg.getIncomingEdges(t.id)) {
+      if (e.kind === 'references' && (e.metadata as { valueRef?: boolean } | undefined)?.valueRef) {
+        const r = cg.getNode(e.source)?.name;
+        if (r) readers.add(r);
+      }
+    }
+  }
+  return [...readers];
 }
 
 describe('value-reference edges', () => {
@@ -98,6 +105,606 @@ describe('value-reference edges', () => {
     expect(valueRefReaders(cg, 'Module')).toEqual([]);
   });
 
+  it('edges readers that use the const only inside JSX (.tsx)', async () => {
+    // The tsx-specific path: the const is read ONLY inside JSX expressions, so
+    // the reader-scan must descend into the JSX subtree to find it.
+    fs.writeFileSync(
+      path.join(dir, 'widget.tsx'),
+      [
+        'export const THEME_TOKENS = { color: "red", size: 12 };',
+        'export function Label() {',
+        '  return <span style={{ color: THEME_TOKENS.color }}>hi</span>;',
+        '}',
+        'export const Box = () => <div data-size={THEME_TOKENS.size} />;',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'THEME_TOKENS')).toEqual(expect.arrayContaining(['Label', 'Box']));
+  });
+
+  it('edges same-file readers to a module-level const/static (Rust)', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'lib.rs'),
+      [
+        'const MAX_RETRIES: u32 = 3;',
+        'static DEFAULT_LABEL: &str = "prod";',
+        '',
+        'fn retry() -> u32 { MAX_RETRIES }',
+        "fn label() -> &'static str { DEFAULT_LABEL }",
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_RETRIES')).toEqual(expect.arrayContaining(['retry']));
+    expect(valueRefReaders(cg, 'DEFAULT_LABEL')).toEqual(expect.arrayContaining(['label']));
+  });
+
+  it('does NOT edge a Rust const shadowed by a local let of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'shadow.rs'),
+      [
+        'const TIMEOUT: u32 = 30;',
+        '',
+        'fn uses_const() -> u32 { TIMEOUT }',
+        'fn shadows() -> u32 {',
+        '    let TIMEOUT = 5;',
+        '    TIMEOUT',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges same-file readers to a package-level const/var (Go)', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'main.go'),
+      [
+        'package main',
+        '',
+        'const MaxRetries = 3',
+        'var DefaultLabels = map[string]string{"env": "prod"}',
+        '',
+        'func retry() int { return MaxRetries }',
+        'func labels() map[string]string { return DefaultLabels }',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MaxRetries')).toEqual(expect.arrayContaining(['retry']));
+    expect(valueRefReaders(cg, 'DefaultLabels')).toEqual(expect.arrayContaining(['labels']));
+  });
+
+  it('does NOT edge a Go package const shadowed by a local := of the same name', async () => {
+    // `Timeout` is a package const AND a local `:=` (short_var_declaration) in
+    // shadows(). The local read resolves to the inner binding, so a file-scope
+    // edge would be a false positive — the shadow prune drops the whole target.
+    fs.writeFileSync(
+      path.join(dir, 'shadow.go'),
+      [
+        'package main',
+        '',
+        'const Timeout = 30',
+        '',
+        'func usesConst() int { return Timeout }',
+        'func shadows() int {',
+        '\tTimeout := 5',
+        '\treturn Timeout',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'Timeout')).toEqual([]);
+  });
+
+  it('keeps a conditionally-defined module const (try/except), not a shadow (Python)', async () => {
+    // `HAS_SSL` is defined twice but BOTH at module scope (a conditional def, a
+    // very common Python idiom). It is one logical const, not a shadow, so its
+    // reader must stay edged — and the two halves must not edge each other.
+    fs.writeFileSync(
+      path.join(dir, 'cond.py'),
+      [
+        'try:',
+        '\tHAS_SSL = True',
+        'except ImportError:',
+        '\tHAS_SSL = False',
+        '',
+        'def uses_ssl():',
+        '\treturn HAS_SSL',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'HAS_SSL')).toEqual(['uses_ssl']);
+  });
+
+  it('edges readers to a top-level AND a class-internal constant (Ruby)', async () => {
+    // Ruby keeps almost all constants inside a class/module. Both the top-level
+    // `MAX_RETRIES` and the class-internal `Config::TIMEOUT` must be targets, and
+    // their same-file readers edged (TIMEOUT is read by two methods of Config).
+    fs.writeFileSync(
+      path.join(dir, 'app.rb'),
+      [
+        'MAX_RETRIES = 3',
+        '',
+        'def retry_count',
+        '  MAX_RETRIES',
+        'end',
+        '',
+        'class Config',
+        '  TIMEOUT = 30',
+        '  def self.get_timeout',
+        '    TIMEOUT',
+        '  end',
+        '  def describe',
+        '    "timeout=#{TIMEOUT}"',
+        '  end',
+        'end',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_RETRIES')).toEqual(expect.arrayContaining(['retry_count']));
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual(expect.arrayContaining(['get_timeout', 'describe']));
+  });
+
+  it('edges same-file readers to a file-scope const/table (C)', async () => {
+    // C keeps shareable values at file scope as `static const` — scalars and,
+    // very commonly, pointer/array lookup tables. Both must be extracted as
+    // nodes (the generic fallback misses C's nested init_declarator name) and
+    // their same-file readers edged.
+    fs.writeFileSync(
+      path.join(dir, 'config.c'),
+      [
+        'static const int MAX_ITEMS = 100;',
+        'static const char *const STATUS_NAMES[] = { "ok", "fail", "pending" };',
+        '',
+        'int capped(int n) { return n > MAX_ITEMS ? MAX_ITEMS : n; }',
+        'const char *label(int i) { return STATUS_NAMES[i]; }',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_ITEMS')).toEqual(expect.arrayContaining(['capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+  });
+
+  it('does NOT edge a C file const shadowed by a function-local of the same name', async () => {
+    // `TIMEOUT` is a file const AND a local `int TIMEOUT = 5` (init_declarator)
+    // in shadows(). The local read resolves to the inner binding, so a
+    // file-scope edge would be a false positive — the shadow prune drops it.
+    fs.writeFileSync(
+      path.join(dir, 'shadow.c'),
+      [
+        'static const int TIMEOUT = 30;',
+        '',
+        'int uses_const(void) { return TIMEOUT; }',
+        'int shadows(void) {',
+        '    int TIMEOUT = 5;',
+        '    return TIMEOUT;',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('does NOT mint a value target from a macro-prefixed C prototype (return-type misparse)', async () => {
+    // A prototype led by an unknown macro (`CURL_EXTERN CURLcode fn(args);`)
+    // makes tree-sitter-c misparse it as a declaration whose "variable" is the
+    // bare return-type identifier — which would mint a spurious `CURLcode`
+    // value target read by every function of that type. The bare-identifier
+    // skip prevents it, while real file-scope consts still edge their readers.
+    fs.writeFileSync(
+      path.join(dir, 'api.c'),
+      [
+        'typedef enum { CURLE_OK, CURLE_FAIL } CURLcode;',
+        'CURL_EXTERN CURLcode curl_easy_init(int x);',
+        'CURL_EXTERN CURLcode curl_easy_setopt(int y);',
+        '',
+        'static const int REAL_LIMIT = 42;',
+        'int use_real(void) { return REAL_LIMIT; }',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    // The return-type name is never extracted as a const/var, so it is not a
+    // value-ref target at all.
+    const curlcodeValues = cg
+      .searchNodes('CURLcode')
+      .map((r) => r.node)
+      .filter((n) => n.name === 'CURLcode' && (n.kind === 'constant' || n.kind === 'variable'));
+    expect(curlcodeValues).toEqual([]);
+    // Real file-scope consts alongside the misparse-prone prototypes still work.
+    expect(valueRefReaders(cg, 'REAL_LIMIT')).toEqual(expect.arrayContaining(['use_real']));
+  });
+
+  it('edges same-file methods to a class-scope static final constant (Java)', async () => {
+    // Java keeps constants as `static final` fields inside a class. They extract
+    // as `constant` kind (not `field`) so the value-ref gate targets them; a
+    // plain instance `final` field is NOT a constant and must not be a target.
+    fs.writeFileSync(
+      path.join(dir, 'Limits.java'),
+      [
+        'class Limits {',
+        '  public static final int MAX_ITEMS = 100;',
+        '  static final String[] STATUS_NAMES = { "ok", "fail" };',
+        '  final int instanceId = 1;',
+        '  int capped(int n) { return n > MAX_ITEMS ? MAX_ITEMS : n; }',
+        '  String label(int i) { return STATUS_NAMES[i]; }',
+        '  int id() { return instanceId; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_ITEMS')).toEqual(expect.arrayContaining(['capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+    // An instance `final` field is mutable per-object state, not a shared
+    // constant — it stays `field` kind and is never a value-ref target.
+    expect(valueRefReaders(cg, 'instanceId')).toEqual([]);
+  });
+
+  it('does NOT edge a Java class const shadowed by a method-local of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'Shadow.java'),
+      [
+        'class Shadow {',
+        '  static final int TIMEOUT = 30;',
+        '  int usesConst() { return TIMEOUT; }',
+        '  int shadows() { int TIMEOUT = 5; return TIMEOUT; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges same-file methods to a class const / static readonly (C#)', async () => {
+    // C# constants are `const` (compile-time) or `static readonly` (runtime);
+    // both extract as `constant`. An instance `readonly` field is per-object and
+    // stays `field`.
+    fs.writeFileSync(
+      path.join(dir, 'Limits.cs'),
+      [
+        'class Limits {',
+        '  const int MAX_ITEMS = 100;',
+        '  static readonly string[] STATUS_NAMES = { "ok", "fail" };',
+        '  readonly int instanceId = 1;',
+        '  int Capped(int n) { return n > MAX_ITEMS ? MAX_ITEMS : n; }',
+        '  string Label(int i) { return STATUS_NAMES[i]; }',
+        '  int Id() { return instanceId; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_ITEMS')).toEqual(expect.arrayContaining(['Capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['Label']));
+    expect(valueRefReaders(cg, 'instanceId')).toEqual([]);
+  });
+
+  it('does NOT edge a C# class const shadowed by a method-local of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'Shadow.cs'),
+      [
+        'class Shadow {',
+        '  const int TIMEOUT = 30;',
+        '  int UsesConst() { return TIMEOUT; }',
+        '  int Shadows() { int TIMEOUT = 5; return TIMEOUT; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges same-file readers to a top-level and class const, incl. self:: / Class:: (PHP)', async () => {
+    // PHP keeps constants at file scope (`const X`) and inside classes (`const
+    // X`), both extracted as `constant`. A constant *reference* is a `name` node
+    // (bare `X`, or the const half of `self::X` / `Foo::X`), so the reader-scan
+    // must match `name`. A `$var` local is a different namespace and can never
+    // shadow a bare constant — so there is nothing to prune.
+    fs.writeFileSync(
+      path.join(dir, 'Config.php'),
+      [
+        '<?php',
+        'const APP_VERSION = "1.0";',
+        'class Config {',
+        '  const MAX_ITEMS = 100;',
+        '  const STATUS_NAMES = ["ok", "fail"];',
+        '  public static $counter = 0;',
+        '  function capped($n) { return $n > self::MAX_ITEMS ? self::MAX_ITEMS : $n; }',
+        '  function label($i) { return Config::STATUS_NAMES[$i]; }',
+        '  function version() { return APP_VERSION; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_ITEMS')).toEqual(expect.arrayContaining(['capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+    expect(valueRefReaders(cg, 'APP_VERSION')).toEqual(expect.arrayContaining(['version']));
+    // A static property is mutable class state, not a constant — never a target.
+    expect(valueRefReaders(cg, 'counter')).toEqual([]);
+  });
+
+  it('edges readers to a top-level and object-scope val, not a class instance val (Scala)', async () => {
+    // Scala has no `static`: an `object` is a singleton, so its `val`s are the
+    // shared-constant idiom (extracted as `constant`, like a top-level val). A
+    // `class` val is a per-instance immutable field (`field`, never a target).
+    fs.writeFileSync(
+      path.join(dir, 'Demo.scala'),
+      [
+        'val AppVersion = "1.0"',
+        'object Config {',
+        '  val TIMEOUT_MS = 30',
+        '  val STATUS_NAMES = List("ok", "fail")',
+        '  def capped(n: Int): Int = if (n > TIMEOUT_MS) TIMEOUT_MS else n',
+        '  def label(i: Int): String = STATUS_NAMES(i)',
+        '}',
+        'class Widget {',
+        '  val MaxItems = 100',
+        '  def within(n: Int): Int = if (n < MaxItems) n else MaxItems',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT_MS')).toEqual(expect.arrayContaining(['capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+    // A class instance `val` is per-object state (kind `field`), not a shared
+    // constant — never a value-ref target even though `within` reads it.
+    expect(valueRefReaders(cg, 'MaxItems')).toEqual([]);
+  });
+
+  it('does NOT edge a Scala object val shadowed by a method-local val of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'Shadow.scala'),
+      [
+        'object Config {',
+        '  val TIMEOUT = 30',
+        '  def usesConst(): Int = TIMEOUT',
+        '  def shadows(): Int = { val TIMEOUT = 5; TIMEOUT }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges readers to top-level, object, and companion-object constants, not a class val (Kotlin)', async () => {
+    // Kotlin has no `static`: a top-level property, an `object` (singleton), and a
+    // class's `companion object` all hold shared constants (`val`→constant). A
+    // class instance `val` is per-object state (`field`, never a target). The
+    // property name nests as variable_declaration→simple_identifier, and a const
+    // reference is a `simple_identifier`.
+    fs.writeFileSync(
+      path.join(dir, 'Demo.kt'),
+      [
+        'const val TOP_LEVEL_MAX = 100',
+        'object Config {',
+        '  const val TIMEOUT_MS = 30',
+        '  val STATUS_NAMES = listOf("ok", "fail")',
+        '  fun capped(n: Int): Int = if (n > TIMEOUT_MS) TIMEOUT_MS else n',
+        '  fun label(i: Int): String = STATUS_NAMES[i]',
+        '}',
+        'class Widget {',
+        '  companion object { const val MAX_RETRIES = 3 }',
+        '  val instanceField = 1',
+        '  fun retries(): Int = MAX_RETRIES',
+        '  fun within(n: Int): Int = if (n < TOP_LEVEL_MAX) n else TOP_LEVEL_MAX',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+    expect(valueRefReaders(cg, 'MAX_RETRIES')).toEqual(expect.arrayContaining(['retries']));
+    expect(valueRefReaders(cg, 'TOP_LEVEL_MAX')).toEqual(expect.arrayContaining(['within']));
+    // A class instance `val` is per-object state (kind `field`), never a target.
+    expect(valueRefReaders(cg, 'instanceField')).toEqual([]);
+  });
+
+  it('does NOT edge a Kotlin object const shadowed by a method-local val of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'Shadow.kt'),
+      [
+        'object Config {',
+        '  const val TIMEOUT = 30',
+        '  fun usesConst(): Int = TIMEOUT',
+        '  fun shadows(): Int { val TIMEOUT = 5; return TIMEOUT }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges readers to a top-level let and static let in enum/struct, not an instance let (Swift)', async () => {
+    // Swift has no `static` keyword for globals; the shared-constant idiom is a
+    // top-level `let` or a `static let` inside a type — Swift namespaces these in
+    // `enum`/`struct`. Those extract as `constant`; an instance stored `let` is
+    // per-object (`field`, never a target); a *computed* property is skipped.
+    fs.writeFileSync(
+      path.join(dir, 'Demo.swift'),
+      [
+        'let topLevelMax = 100',
+        'enum Constants {',
+        '  static let TIMEOUT_MS = 30',
+        '  static let STATUS_NAMES = ["ok", "fail"]',
+        '}',
+        'struct Widget {',
+        '  static let MAX_RETRIES = 3',
+        '  let instanceField = 1',
+        '  func retries() -> Int { return Widget.MAX_RETRIES }',
+        '  func within(_ n: Int) -> Int { return n < topLevelMax ? n : topLevelMax }',
+        '}',
+        'func labels(_ i: Int) -> String { return Constants.STATUS_NAMES[i] }',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['labels']));
+    expect(valueRefReaders(cg, 'MAX_RETRIES')).toEqual(expect.arrayContaining(['retries']));
+    expect(valueRefReaders(cg, 'topLevelMax')).toEqual(expect.arrayContaining(['within']));
+    // An instance `let` is per-object state (kind `field`), never a target.
+    expect(valueRefReaders(cg, 'instanceField')).toEqual([]);
+  });
+
+  it('does NOT edge a Swift static const shadowed by a function-local let of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'Shadow.swift'),
+      [
+        'enum Config {',
+        '  static let TIMEOUT = 30',
+        '  static func usesConst() -> Int { return TIMEOUT }',
+        '  static func shadows() -> Int { let TIMEOUT = 5; return TIMEOUT }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges readers to a top-level const and a class static const/final (Dart)', async () => {
+    // Dart's grammar uses `static_final_declaration` for exactly the top-level
+    // `const`/`final` and class `static const`/`static final` — the shared
+    // constants — so those extract as `constant`. Instance fields and `var`
+    // (`initialized_identifier`) and locals (`initialized_variable_definition`)
+    // are NOT this node, so they never become targets. Dart attaches a method
+    // body as a sibling of the signature, so the reader-scan pulls that in.
+    fs.writeFileSync(
+      path.join(dir, 'demo.dart'),
+      [
+        'const TOP_LEVEL_MAX = 100;',
+        'class Config {',
+        '  static const TIMEOUT_MS = 30;',
+        '  static final STATUS_NAMES = ["ok", "fail"];',
+        '  final int instanceField = 1;',
+        '  int capped(int n) => n > TIMEOUT_MS ? TIMEOUT_MS : n;',
+        '  String label(int i) { return STATUS_NAMES[i]; }',
+        '  int withinLimit(int n) => n < TOP_LEVEL_MAX ? n : TOP_LEVEL_MAX;',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT_MS')).toEqual(expect.arrayContaining(['capped']));
+    expect(valueRefReaders(cg, 'STATUS_NAMES')).toEqual(expect.arrayContaining(['label']));
+    expect(valueRefReaders(cg, 'TOP_LEVEL_MAX')).toEqual(expect.arrayContaining(['withinLimit']));
+    // An instance field is per-object state, never a value-ref target.
+    expect(valueRefReaders(cg, 'instanceField')).toEqual([]);
+  });
+
+  it('does NOT edge a Dart const shadowed by a method-local const of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'shadow.dart'),
+      [
+        'const TIMEOUT = 30;',
+        'class C {',
+        '  int usesConst() => TIMEOUT;',
+        '  int shadows() { const TIMEOUT = 5; return TIMEOUT; }',
+        '}',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
+  it('edges same-file functions to a unit-scope const (Pascal)', async () => {
+    // Pascal keeps shareable constants in a `const` section at unit (file) scope
+    // (and class scope). They already extract as `constant`. A const reference is
+    // an `identifier`; the catch is that Pascal attaches a proc body (`block`) as
+    // a sibling of the proc header (`declProc`, the reader scope), so the
+    // reader-scan pulls in that sibling.
+    fs.writeFileSync(
+      path.join(dir, 'demo.pas'),
+      [
+        'unit Demo;',
+        'interface',
+        'const',
+        '  MAX_ITEMS = 100;',
+        "  APP_NAME = 'MyApp';",
+        'implementation',
+        'function Capped(n: Integer): Integer;',
+        'begin',
+        '  if n > MAX_ITEMS then Capped := MAX_ITEMS else Capped := n;',
+        'end;',
+        'function AppLabel: string;',
+        'begin',
+        '  AppLabel := APP_NAME;',
+        'end;',
+        'end.',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'MAX_ITEMS')).toEqual(expect.arrayContaining(['Capped']));
+    expect(valueRefReaders(cg, 'APP_NAME')).toEqual(expect.arrayContaining(['AppLabel']));
+  });
+
+  it('does NOT edge a Pascal unit const shadowed by a function-local const of the same name', async () => {
+    fs.writeFileSync(
+      path.join(dir, 'shadow.pas'),
+      [
+        'unit Shadow;',
+        'interface',
+        'const',
+        '  TIMEOUT = 30;',
+        'implementation',
+        'function UsesConst: Integer;',
+        'begin',
+        '  UsesConst := TIMEOUT;',
+        'end;',
+        'function Shadows: Integer;',
+        'const TIMEOUT = 5;',
+        'begin',
+        '  Shadows := TIMEOUT;',
+        'end;',
+        'end.',
+      ].join('\n'),
+    );
+    cg = index();
+    await cg.indexAll();
+
+    expect(valueRefReaders(cg, 'TIMEOUT')).toEqual([]);
+  });
+
   it('emits nothing when CODEGRAPH_VALUE_REFS=0', async () => {
     const prev = process.env.CODEGRAPH_VALUE_REFS;
     process.env.CODEGRAPH_VALUE_REFS = '0';

+ 544 - 0
docs/design/value-reference-edges-playbook.md

@@ -0,0 +1,544 @@
+# Playbook: extend value-reference edges to a new language
+
+**Purpose.** This is the operational runbook for adding + validating value-reference-edge
+coverage for one more language. Point a fresh session at this file and say **"Start on
+language X"** — it has everything: how the feature works, where the code is, the exact
+validation recipe (with scripts), the per-language checklist, and the traps already hit.
+
+Design rationale + the validation matrix already done live in the companion doc:
+[`value-reference-edges.md`](./value-reference-edges.md). This file is the *how-to*.
+
+---
+
+## 0. "Start on language X" — do this in order
+
+1. Read §1 (how it works) and §2 (current state) so you know the mechanism and what's done.
+2. Do the **per-language wiring check** (§5 step A–C) — this is where languages differ and
+   where most of the real work/decisions are. Do NOT skip: a wrong declarator node type or a
+   class-scope-vs-file-scope mismatch makes the feature silently emit nothing (or wrong edges).
+3. Run the **validation sweep** (§4) on small/medium/large **public OSS** repos for that
+   language. Hunt FPs. **Fix FP clusters; record singletons.** (See §3 for what a real FP
+   looks like vs an acceptable one.)
+4. Add a **row to the matrix** in `value-reference-edges.md` and a **test case** in
+   `__tests__/value-reference-edges.test.ts`.
+5. Commit on a branch, open a PR. (§6 has the git workflow + how the prior PRs were done.)
+
+Scope rule (hard): **never eval on the maintainer's own repos** — clone a real public OSS
+repo for the language. (Memory: `agent-eval-targets-public-oss-only`.)
+
+---
+
+## 1. How value-reference edges work
+
+**What:** a `references` edge with `metadata: { valueRef: true }` from a *reader symbol* to
+the **file-scope `const`/`var` it reads**, same-file only. It exists so impact analysis
+catches "change this constant / config object / lookup table → affect its readers" — a class
+of change calls/imports/inheritance edges never captured (a const's consumers used to look
+like "nothing depends on this").
+
+**Where it flows:** straight into `getImpactRadius` → `codegraph impact` and the impact trail
+in `codegraph_explore` / `codegraph_node`. No agent-behaviour change required. **The win is
+impact-radius correctness** (a const 90 symbols read going from "1 affected" to "90"), *not*
+agent read-reduction (see §4.3).
+
+**Code — all in `src/extraction/tree-sitter.ts`:**
+
+| Symbol | Role |
+|---|---|
+| `VALUE_REF_LANGS` (static Set) | languages the feature runs for. Currently `typescript`, `javascript`, `tsx`, `go`, `python`, `rust`, `ruby`, `c`, `java`, `csharp`, `php`, `scala`, `kotlin`, `swift`, `dart`, `pascal`. **Add the new language here.** |
+| `valueRefsEnabled` | `process.env.CODEGRAPH_VALUE_REFS !== '0'` — default ON, env opts out. |
+| `MAX_VALUE_REF_NODES` (20_000) | per-scope traversal cap (and the shadow-scan cap). |
+| `captureValueRefScope(kind, name, id, node)` | called from `createNode` on every node. Records **targets** (file-scope `const`/`var`) and **reader scopes** (`function`/`method`/`const`/`var`). |
+| `flushValueRefs()` | called once at end of `extract()`. Prunes shadowed targets, then for each reader scope walks its subtree for identifiers matching a target name and emits the edges. |
+
+**The two gates inside `captureValueRefScope`** (what you may need to adjust per language):
+
+- **Target gate:** `kind ∈ {constant, variable}` **and** `name.length >= 3` **and**
+  `/[A-Z_]/.test(name)` (distinctive name — dodges single-letter / all-lowercase shadowing)
+  **and** the node's parent id starts with `file:`, `class:`, or `module:` (file/class/module scope).
+- **Reader gate:** `kind ∈ {function, method, constant, variable}`.
+
+**The emit loop in `flushValueRefs`:** same-file only (targets + scopes are per-file, reset
+each flush); deduped per `(reader, target)`; skips `isGeneratedFile(path)`; **prunes shadowed
+targets** (see §3).
+
+---
+
+## 2. Current state (what's shipped + validated)
+
+- **Default ON** for TS/JS/tsx + Go + Python + Rust + Ruby + C + Java + C# (`CODEGRAPH_VALUE_REFS=0` disables). Shipped in **PR #895**
+  (flip-on + the shadow prune); Go added in a later PR (the shadow-prune declarator switch +
+  `VALUE_REF_LANGS`); C added later still (extractor change to emit the nodes + the bare-identifier
+  misparse guard); Java + C# after that (field→constant kind switch for the const subset).
+- **Validated S/M/L** in **TS, JS, tsx, Go, Python, Rust, Ruby, C, Java, and C#** — see the matrix in the
+  design doc. All clean: node count identical on/off, precision guards held, impact win
+  reproduced. Go required extending the shadow prune (per-grammar declarators) — the worked
+  example of "step B is load-bearing." **C required the Ruby treatment** (the extractor didn't emit
+  C file-scope const/var nodes at all) **plus** a C-specific FP guard (a macro-prefixed-prototype
+  misparse mints a bare-identifier "variable" named after the return type — skip bare-`identifier`
+  declarators). It was the worked example of "the §2b coverage table's *easy-path* guess can be
+  wrong — always do §5 step C (confirm the nodes exist) before trusting it."
+- **Java + C# were the cleanest class-scope ("Ruby treatment") languages.** The constants already
+  extract — but as `field` kind, which the gate rejects. The whole change was emitting the const
+  *subset* as `constant`: an `isConst` predicate on each extractor (Java `static final`; C# `const`
+  / `static readonly`) + a kind switch in `extractField`. **No new shadow-prune wiring** (method
+  locals are `variable_declarator`, already in the switch) and **no FP guards** (UPPER_SNAKE /
+  PascalCase fit the distinctive-name gate). Instance `final`/`readonly` fields correctly stay
+  `field`. Validated S/M/L: gson/commons-lang/guava, automapper/newtonsoft/efcore — 0 leaks, node
+  parity, big impact wins (`INDEX_NOT_FOUND` 4→165, `_resourceManager` 22→1664).
+- **PHP was the cleanest of all — one reader-scan line.** Constants already extract as `constant`
+  (top-level + class), so the only change was teaching the reader-scan that a PHP constant
+  *reference* is a `name` node (bare `X`, or the const half of `self::X` / `Foo::X`). **No extractor
+  change, no prune wiring** (a `$var` local can't shadow a bare constant — different namespace).
+  Validated S/M/L (guzzle/monolog/laravel), all clean, 0 class/const collisions. The honest caveat:
+  **lower yield** — PHP reads constants cross-file far more than same-file (laravel 2,956 files → 86
+  edges), and value-refs is same-file only; still correct, just a smaller contribution.
+- **Scala — an `object` is the constant scope.** Scala has no `static`; a singleton `object`'s `val`s
+  are the shared-constant idiom (`object Config { val Timeout = 30 }`). Top-level `val` already
+  extracted as `constant`, but object/class vals both came out as `field`. The fix: in the Scala
+  `val_definition` handler, walk to the enclosing definition — `object_definition` (or top-level) →
+  `constant`/`variable`; `class`/`trait`/`enum` → `field` (per-instance, like Java instance `final`).
+  Added `val_definition`/`var_definition` to the shadow prune (method-local `val` shadows). Reader-scan
+  needed nothing (refs are `identifier`). Minor known limitation: Scala uses `val`/`def`
+  interchangeably for members, so a camelCase val can share a name with a method — same-file name
+  matching can't tell them apart (bounded, like Ruby's sibling-class; sweep showed flagged collisions
+  were mostly real object vals read by siblings). Validated S/M/L (upickle/cats/pekko).
+- **C++ was attempted and reverted — DON'T retry without solving parse fidelity first.** tree-sitter-cpp
+  mis-parses real template/macro-heavy C++ (and `.h` files route to the C grammar): class members and
+  parameters leak to file scope as bogus constants/variables. Two guards (skip `ERROR`-ancestor and
+  `compound_statement`-ancestor declarations) removed ~83% of gross leaks, but the residual pervades
+  even well-structured library source (template-class member leaks, amalgamated mega-headers,
+  `.h`-as-C++). It did not reach the precision bar of the other languages. See the C++ section below.
+- **Kotlin = C + Scala + PHP techniques combined (and clean).** Nothing extracted before (property name
+  nests `property_declaration → variable_declaration → simple_identifier` — the C problem). Fix:
+  handle `property_declaration` in the Kotlin `visitNode` hook — pull the nested name, walk to the
+  enclosing definition for the kind (`object`/`companion object`/top-level → `constant`/`variable`;
+  `class` → `field` — the Scala rule; skip locals under a `function_body`/`init`/lambda), add
+  `simple_identifier` to the reader-scan (the PHP-`name` move), and `property_declaration` to the
+  shadow prune. Clean parse fidelity (the one `fun interface` misparse is already handled), so no
+  C++-style tail. One of the cleanest yields — companion-object bit-masks/state consts are a heavy
+  same-file-read idiom. Validated S/M/L (okio/coroutines/ktor); only the bounded val/def-or-class and
+  sibling-companion name overlaps remain (shared with Scala/Ruby).
+- **Swift reused Kotlin + two Swift-specific touches.** Top-level `let` + `static let` in a type are
+  the shared constants (`enum`/`struct` namespace them); instance `let` stays `field`. Nested name
+  (`property_declaration → <name> pattern → simple_identifier`); reader-scan already covered
+  (`simple_identifier`, from Kotlin). Two new things: **(1) the target gate was widened to `struct:`/
+  `enum:` parents** — Swift namespaces constants there (`enum Constants { static let X }`), and every
+  other language's targets are `file:`/`class:`/`module:`; **(2) computed properties are skipped** (a
+  `var x:Int{ … }` getter has no stored value — detect the `computed_property` child). Node creation
+  slots into the *existing* Swift `property_declaration` handler (property-wrapper/type deps), leaving
+  that untouched. Clean parse, no tail. Validated S/M/L (Alamofire/swift-argument-parser/swift-nio).
+- **Dart — clean grammar separation, but a sibling-body reader-scan fix.** Dart's grammar already
+  splits the cases: **`static_final_declaration`** is *exactly* a top-level/`static` `const`/`final`
+  (the shared-constant idiom), while instance fields/`var` use `initialized_identifier` and locals use
+  `initialized_variable_definition` — so extracting `static_final_declaration` → `constant` (in a
+  `visitNode` hook) has **no instance/local leaks to guard**. Reader-scan free (Dart refs are
+  `identifier`). The catch was the **reader-scan**: Dart attaches a method/function `body` as a *next
+  sibling* of the signature node (the stored scope), not a child, so the scan saw only the signature
+  and **found nothing** until it was taught to pull in a `function_body` next-sibling (Dart-only among
+  the value-ref set). Shadow prune needed `static_final_declaration` + `initialized_identifier` +
+  `initialized_variable_definition` (a local `const X` shadowing a file `const X`). Validated S/M/L
+  (http/flame/flutter-packages). **Caveat:** generated Dart files inflate the sibling-class ambiguity
+  (a JNIGEN `_bindings.dart` with hundreds of `static final _class` collapses to the file-wide target).
+  The common codegen suffixes (`.g.dart`/`.freezed.dart`/`.pb.dart`) are already filtered by
+  `isGeneratedFile`; header-only-marked generators (JNIGEN) are not, so real source is clean but
+  generated FFI/JNI bindings are noisy.
+- **Pascal — the genuine easy path + the Dart sibling-body fix again.** Unit/class `const` *already*
+  extracted as `constant` (`variableTypes: ['declConst', …]`), so it was add-to-`VALUE_REF_LANGS` +
+  the shadow prune (`declConst`/`declVar`; a local `const X` shadows a unit `const X`). The catch was
+  the *same* reader-scan bug as Dart: Pascal's proc body is a **`block` sibling** of the `declProc`
+  header (the reader scope), both under a `defProc` — so the same sibling-pull fix was extended to
+  `block`. Reader-scan node type already covered (refs are `identifier`). **Low yield** — Pascal reads
+  constants cross-unit more than same-file (horse: 4 edges). **Caveat:** Pascal is case-insensitive,
+  but the reader-scan matches exact text, so a differently-cased reference is missed (no FP, just a
+  miss); not worth normalizing.
+- **Tests:** `__tests__/value-reference-edges.test.ts` — same-file readers edged; surfaced in
+  impact radius; shadowed const NOT edged (verified to fail without the guard); JSX-only read
+  edged (tsx); `CODEGRAPH_VALUE_REFS=0` emits nothing.
+- **Memory:** `value-reference-edges-default-on` (the A/B finding + shadow guard rationale).
+
+---
+
+## 2b. Coverage vs the README (languages + frameworks)
+
+Tracked against the README's **Supported Languages** table (24 rows) and **Framework-aware
+Routes** list. Value-refs is **language-level**, so frameworks are *not* a separate axis (see
+the bottom of this section).
+
+**✅ Done — validated S/M/L (15 + 3 inherited):**
+
+| Language | How |
+|---|---|
+| TypeScript, JavaScript, tsx | file-scope `const`/`var`; the original languages |
+| Python | module-level `NAME =` |
+| Go | package `const`/`var` |
+| Rust | module + impl `const`/`static` |
+| Ruby | class/module `CONST` (the class-scope extension) |
+| C | file-scope `static const` scalars + pointer/array lookup tables + mutable globals. **Needed an extractor change** (nodes weren't emitted) + a bare-identifier misparse guard — NOT the easy path the table below first guessed |
+| Java | class `static final` fields. Nodes existed as `field` kind; emitted the const subset as `constant` (`isConst` + `extractField` kind switch). No new prune wiring, no FP guards |
+| C# | class `const` / `static readonly`. Identical to Java — same `field`→`constant` change |
+| PHP | top-level `const` + class `const` (both already `constant` kind). **Only** change was the reader-scan: a PHP const *reference* is a `name` node. No extractor change, no prune wiring (a `$var` local can't shadow a bare constant). Lower yield — PHP reads consts cross-file more than same-file |
+| Scala | top-level `val` (already `constant`) + **`object` val** (the singleton-constant idiom; re-kinded from `field` by walking to the enclosing `object_definition`). `class`/`trait`/`enum` vals stay `field`. `val_definition`/`var_definition` added to the shadow prune. Minor val/def name-collision limit |
+| Kotlin | top-level / `object` / `companion object` `val` (re-kinded from nothing — properties weren't extracted at all). Handled in `visitNode`: nested name (`variable_declaration → simple_identifier`, the C move) + scope-walk for kind (Scala move) + `simple_identifier` in the reader-scan (PHP move) + prune. `class` instance vals stay `field`. Clean — one of the best yields (companion bit-masks) |
+| Swift | top-level `let` + `static let` in `struct`/`enum`/`class`. Reused Kotlin (nested name + `simple_identifier` reader-scan). Two Swift touches: **gate widened to `struct:`/`enum:` parents** (Swift namespaces consts there), and **computed properties skipped**. `class`/instance stored props stay `field`. Slots into the existing Swift property-wrapper handler |
+| Dart | top-level `const`/`final` + class `static const`/`static final` — all the **`static_final_declaration`** node, cleanly separated by the grammar from instance/`var`/local (so no leak guard). `visitNode` → `constant`. Needed a reader-scan fix: Dart's method **body is a next sibling** of the signature, so the scan pulls in a `function_body` sibling. Generated-FFI noise (JNIGEN `_bindings.dart`) is the one caveat |
+| Pascal / Delphi | unit/class `const` (already extracted as `constant`). Add-to-`VALUE_REF_LANGS` + shadow prune (`declConst`/`declVar`) + the **same Dart sibling-body fix** (Pascal's proc body is a `block` sibling of the `declProc` header). Low yield (cross-unit reads); case-insensitive (exact-text scan misses re-cased refs) |
+| **Svelte, Vue, Astro** | **inherited for free** — their extractors re-parse the `<script>`/frontmatter block as `typescript`/`javascript`, which are in `VALUE_REF_LANGS` (verified: a `.svelte` `const` edges its readers). No separate work; no separate matrix row needed. |
+
+**🔜 Remaining — likely the easy path** (constants are file/module-scope, or top-level; do §5: add
+to `VALUE_REF_LANGS`, verify the declarator node type + extractor kind, sweep). Classify each
+*before* building — several are mixed file+class scope. **Caveat learned from C:** "easy path" here
+means *scope* fits — it does NOT promise the extractor already emits the const nodes. C was in this
+column but emitted *no* file-scope const/var nodes (its name nests in an `init_declarator` the
+generic fallback can't read), so it needed the Ruby-style extractor change after all. **Always run
+§5 step C (confirm `select kind,name from nodes …` actually shows the consts) before trusting this
+column.**
+
+| Language | Constant forms | Note |
+|---|---|---|
+| Lua / Luau | file/chunk `local X =` + globals; no `const` keyword | distinctive-name gate (needs `[A-Z_]`) catches fewer — Lua casing varies |
+| R | file-scope `X <- …` / `X = …` | |
+
+**🧱 Remaining — needs the Ruby treatment** (constants live almost entirely **inside a
+class/type**; the class-scope *gate* exists now, but first confirm the extractor emits them as
+`constant`/`variable` nodes — Ruby's weren't extracted at all, and class fields often come out as
+`field`/`property` kind, which the gate rejects). **Java + C# (done) were this case**: their
+constants extracted as `field` kind, and the fix was emitting the const subset (`static final` /
+`const` / `static readonly`) as `constant` — the template for the rest of this bucket:
+
+| Language | Constant forms |
+|---|---|
+| Objective-C | `static const` / `extern const` / `#define` (file-ish; macros unparsed; already "partial support") |
+
+**⛔ Attempted & reverted — C++.** file-scope + class `static const`/`constexpr` (mixed). Machinery
+built and correct on clean C++, but **tree-sitter-cpp parse fidelity is the blocker**: template/
+macro-heavy real C++ leaks class members + parameters to file scope as bogus constants/variables, and
+`.h` files route to the C grammar (mangling C++ classes). Two guards (skip `ERROR`-ancestor and
+`compound_statement`-ancestor declarations) cut ~83% of gross leaks but the residual pervades even
+well-structured library source. **Did not meet the precision bar; reverted.** Don't retry as a
+"value-refs" task — it needs prior work on C++ parse handling (template-class member scoping,
+`.h`-as-C++ detection, amalgamated-header exclusion).
+
+**🚫 N/A:** Liquid (template language — no value constants to track).
+
+**Frameworks — not a value-refs axis.** The README's framework list (Django, Flask, Express,
+NestJS, Rails, Spring, Gin, Laravel, …) is a *separate* feature: **route-node extraction**.
+Value-refs is framework-agnostic — it covers constants in any framework's code through the
+underlying language support, with **nothing to do per framework**. The validation sweeps already
+ran on framework repos (Rails → Ruby, Django → Python, gin → Go, express/eslint/webpack → JS,
+jekyll/sinatra → Ruby), so framework code is exercised; there's no separate framework matrix.
+
+---
+
+## 3. Precision guards + what counts as a false positive
+
+Guards run in `flushValueRefs`, in order:
+
+1. **`isGeneratedFile(path)`** (`src/extraction/generated-detection.ts`) — skips
+   *suffix-recognised* generated files (`.pb.ts`, `.min.js`, …). **Path-only** — cannot catch
+   content-minified bundles.
+2. **Shadow prune** — drop a target when its **declarator count exceeds its file-scope node
+   count** (so it's also bound in an inner/local scope). Rationale: a bundled/Emscripten `const
+   Module` re-declared as an inner `var Module`, a Go package const shadowed by a local `:=`, or
+   a Python module const shadowed by a local `=` resolves to the *inner* binding for nested
+   readers, so a file-scope edge is wrong. Inner re-bindings aren't graph nodes, so declarators
+   are counted at the **syntax-tree** level. *This is the per-language-sensitive guard:* the
+   declarator node types differ per grammar (§5 step B), and comparing against file-scope node
+   count (not a flat `>1`) is what keeps **conditional module defs** (`try: X=…; except: X=…`).
+3. **Distinctive-name + same-file** (the target gate).
+
+**What a real FP looks like** (fix it): a reader edged to a file-scope const it does **not**
+actually read — almost always **intra-file shadowing** (the name is re-bound in an inner
+scope) concentrated in **bundled/minified/generated** files. On excalidraw this was 23 edges
+in one Emscripten blob.
+
+**What is NOT an FP** (leave it):
+- **CommonJS `var x = require('…')` bindings** (JS) — correct same-file reads; changing the
+  binding *does* affect its readers; dedups against `calls` edges in impact. Not noise.
+- **Module-level mutable `var` state** read by many same-file functions — the intended case.
+- A higher edge share in a language (JS ~4–5% vs TS ~0.7–1.6%) is fine if precision holds.
+
+**Known limitations (intentional, documented):** parameter-only shadowing is *not* guarded
+(the prune counts declarators, not params — guarding it would over-prune legit consts whose
+name coincides with a param); same-file only (no cross-file consumers); reactive/computed
+reads with no static identifier aren't covered.
+
+---
+
+## 4. Validation recipe
+
+### 4.1 Deterministic probe (the core — finds FPs)
+
+Index the same repo twice (on vs `CODEGRAPH_VALUE_REFS=0`); node count **must be identical**
+(edges-only feature). Build first: `npm run build`. Save this as `probe.sh`:
+
+```bash
+#!/usr/bin/env bash
+set -uo pipefail
+SRC="$1"; NAME="$2"; WORK="${WORK:-/tmp/cg-vr}"
+CG="$(pwd)/dist/bin/codegraph.js"
+export CODEGRAPH_TELEMETRY=0 DO_NOT_TRACK=1 CODEGRAPH_NO_DAEMON=1
+ON="$WORK/$NAME-on"; OFF="$WORK/$NAME-off"
+rm -rf "$ON" "$OFF"; mkdir -p "$WORK"
+rsync -a --exclude='.git' "$SRC/" "$ON/"; rsync -a --exclude='.git' "$SRC/" "$OFF/"
+node "$CG" init "$ON"  2>&1 | grep -E "nodes,|Indexed"
+CODEGRAPH_VALUE_REFS=0 node "$CG" init "$OFF" 2>&1 | grep -E "nodes,|Indexed"
+OND="$ON/.codegraph/codegraph.db"; OFD="$OFF/.codegraph/codegraph.db"
+echo "nodes on/off: $(sqlite3 "$OND" 'select count(*) from nodes') / $(sqlite3 "$OFD" 'select count(*) from nodes')  (MUST MATCH)"
+# PRECISE filter — do NOT use LIKE '%valueRef%' (it matches filenames like
+# textModelValueReference.ts; see §7). Always: kind='references' AND the exact key.
+F="kind='references' and metadata like '%\"valueRef\":true%'"
+echo "value-ref edges: $(sqlite3 "$OND" "select count(*) from edges where $F")"
+echo "=== top targets by same-file reader count ==="
+sqlite3 -column "$OND" "select t.name, count(*) r, replace(t.file_path,'$ON/','') f from edges e join nodes t on e.target=t.id where e.$F group by e.target order by r desc limit 15;"
+```
+
+Run: `WORK=/tmp/cg-vr bash probe.sh /path/to/cloned-repo reponame`.
+
+### 4.2 FP hunts (run against the ON db `$OND`, with `F` from above)
+
+```bash
+# (a) bundled/minified files among targets — the #1 FP source (the woff2 case):
+sqlite3 "$OND" "select distinct t.file_path from edges e join nodes t on e.target=t.id where e.$F;" \
+ | while read -r f; do [ -f "$f" ] || continue; \
+     m=$(awk '{if(length>x)x=length}END{print x+0}' "$f"); [ "$m" -gt 300 ] && echo "MINIFIED? $m $f"; done
+# (b) guard invariant — no surviving target re-declared in its file (adjust regex per language):
+sqlite3 "$OND" "select distinct t.name, t.file_path from edges e join nodes t on e.target=t.id where e.$F limit 80;" \
+ | while IFS='|' read -r n f; do [ -f "$f" ] || continue; \
+     c=$(grep -cE "(const|let|var)[[:space:]]+$n\b" "$f"); [ "${c:-0}" -gt 1 ] && echo "LEAK $n x$c $f"; done
+# (c) precision sample — eyeball reader->target pairs across the tree:
+sqlite3 -column "$OND" "select s.name,'->',t.name from edges e join nodes s on e.source=s.id join nodes t on e.target=t.id where e.$F order by e.id desc limit 12;"
+```
+
+For each FP suspect, open the file and confirm whether the reader truly reads that file-scope
+target. Cluster of FPs in one file → fix (extend a guard). One-off → record it, don't chase.
+
+### 4.3 Impact-API delta (the headline) + agent A/B
+
+Headline metric — value-refs turns a blind impact into a real one:
+
+```bash
+for s in SOME_CONST ANOTHER_CONST; do
+  printf "%-20s ON %s OFF %s\n" "$s" \
+    "$(node dist/bin/codegraph.js impact "$s" --path "$ON"  2>/dev/null | grep -oE '— [0-9]+ affected' | head -1)" \
+    "$(node dist/bin/codegraph.js impact "$s" --path "$OFF" 2>/dev/null | grep -oE '— [0-9]+ affected' | head -1)"
+done
+```
+Pick targets from the probe's "top targets" list. Expect ON ≫ OFF (e.g. 1 → 90).
+
+**Agent A/B** (optional per language — the finding below is size/language-independent, so the
+deterministic probe + impact delta usually suffice). If you run it: two **fresh on/off
+indexes**, pre-warm a `--no-watch` daemon per index, `claude -p` with **`--model sonnet
+--effort high`**, ≥2 runs/arm. The pattern in `scripts/agent-eval/ab-new-vs-baseline.sh` is
+the template **but it switches builds + re-indexes (no flag), which wipes a flag-specific
+index — don't use it as-is for a flag A/B.** (Memories: `agent-eval-nested-attach`,
+`agent-eval-targets-public-oss-only`.)
+
+**The established A/B finding (don't re-derive):** across 12 runs on excalidraw both arms did
+0 Read / 0 Grep — the agent answers impact questions in one call and reaches for
+`codegraph_search`/`callers`, *not* `impact`/`explore`, so it often doesn't query the
+value-ref edges at all. ON was never worse than OFF. **So: value-refs does NOT reduce agent
+reads — the win is blast-radius correctness** (impact API / CodeGraph Pro's verdict engine).
+
+---
+
+## 5. Per-language checklist (the actual work)
+
+### A. Where do "constants worth tracking" live? (decide FIRST)
+
+The target gate now accepts **`file:`, `class:`, and `module:`** parents. Before anything:
+
+- If the language puts shareable constants at **file/module scope** (TS/JS, Python module
+  consts, Go package vars, Rust module/impl `const`/`static`) → fits as-is; proceed.
+- If constants live **inside a class/module** (Ruby — done) → the `class:`/`module:` gate now
+  covers them, BUT two things may need fixing first: (1) the extractor must actually *extract*
+  the class-internal constant as a node (the dispatch at the `variableTypes` branch skips
+  class-internal assignments — Ruby needed an exception for `constant`-LHS assignments); (2) the
+  reader-scan must match however the grammar represents a constant *reference* (Ruby uses
+  `constant` nodes, not `identifier`). See the Ruby block in the design doc.
+- **Class-scope precision** uses a **file-wide** target map (one target per name per file), NOT
+  strict same-class matching — because lexical-scope languages (Ruby) let a nested class read an
+  enclosing class's constant, and strict matching would drop those valid reads. The only real FP
+  is the same constant name in *sibling* classes in one file (~1.7% of Ruby targets on rails);
+  valid code rarely hits it (a bare sibling-class constant is a NameError in Ruby).
+- **Java/C#/Kotlin/Swift class-scope constants are DONE.** The gate now accepts `file:`/`class:`/
+  `module:`/**`struct:`/`enum:`** parents — the `struct:`/`enum:` widening was added for Swift, which
+  namespaces shared constants in `enum`/`struct` (`enum Constants { static let X }`). **Lesson for the
+  next class-scope language:** check the *parent kind* of a sample const (`select … substr(id…)`) — if
+  it's `struct:`/`enum:`/`interface:` and the gate doesn't list it, widen the gate (one line) or the
+  feature silently emits nothing despite the nodes existing.
+- **Confirm the reader-scan matches the language's constant *reference* node type (the PHP lesson).**
+  The reader-scan in `flushValueRefs` matches `identifier` / `constant` / `name`. If the new language
+  represents a constant *read* as some other node type, the scan finds nothing and **no edges form**
+  even with targets correctly registered. PHP refs a const as a **`name`** node (bare `X`, and the
+  const half of `self::X` / `Foo::X`), which the scan missed until `name` was added. Dump a sample's
+  reader body (`scripts/agent-eval` or a quick `getParser` walk) and check the node type of a
+  constant reference *before* sweeping — a zero-edge sweep usually means this, not a target-gate bug.
+
+### B. Confirm the declarator node type (for the shadow prune)
+
+The shadow prune (in `flushValueRefs`) counts declarator names via a `switch (n.type)` over
+declarator node types — a file only has its own grammar's nodes, so it's safe to list all
+languages' types in one switch. **Add the new grammar's declarator types there**, with the
+right way to pull the bound name(s). **Verify against the actual grammar** (don't trust this
+table — confirm by parsing a sample). **This step is load-bearing:** if you skip it, the prune
+silently does nothing for the new language and intra-file shadowing produces false positives
+(this is exactly what happened on the first Go pass — see §5-Go below).
+
+| Language | declarator node(s) | name extraction | status |
+|---|---|---|---|
+| TS/JS/tsx | `variable_declarator` | `namedChild(0)` | done |
+| Go | `const_spec`, `var_spec`, `short_var_declaration` | spec → `namedChild(0)`; short-var → identifiers in the `left` field | **done** |
+| Python | `assignment` | `left` field: identifier, or iterate a `pattern_list`/`tuple_pattern` | **done** |
+| Rust | `const_item`, `static_item`, `let_declaration` | const/static → `name` field; let → `pattern` field | **done** |
+| Ruby | `assignment` (LHS is a `constant` node) | already in the switch; Ruby can't local-shadow a constant, so the prune is effectively a no-op for it | **done** (class-scope) |
+| Ruby | `assignment` with constant LHS (`CONST`) | LHS | to verify |
+| C | `init_declarator` in a file-scope `declaration` | `cDeclaratorIdentifier` walks the `declarator` chain (init → pointer/array → identifier) | **done** |
+| C++ | **attempted & reverted** — parse fidelity (see the C++ note in §2b) | — | reverted |
+| Java | `variable_declarator` (field AND method-local) | `namedChild(0)` = name identifier — **already the TS/JS case**, no new wiring | **done** |
+| C# | `variable_declarator` (field AND method-local) | same as Java — already in the switch | **done** |
+| PHP | **none** | a `$var` local (`variable_name`) is a different namespace from a bare constant — a local can never shadow a constant, so the prune is a no-op and needs no PHP declarator | **done** (n/a) |
+| Scala | `val_definition`, `var_definition` | `pattern` field (identifier) — catches an object/top-level val shadowed by a method-local `val` | **done** |
+| Kotlin | `property_declaration` | `variable_declaration → simple_identifier` (and `bump` accepts `simple_identifier`) — catches an object/companion const shadowed by a method-local `val` | **done** |
+| Swift | `property_declaration` | `<name> pattern → simple_identifier` (`firstSimpleIdentifier`) — the prune case resolves both Kotlin and Swift shapes; catches a static const shadowed by a method-local `let` | **done** |
+| Dart | `static_final_declaration` (target) + `initialized_identifier` (field/`var`) + `initialized_variable_definition` (local) | each has a direct `identifier` child — catches a top-level/static const shadowed by a method-local `const` | **done** |
+| Pascal | `declConst` (unit/class const = the target) + `declVar` (a local `var`) | `<name>` field — catches a unit `const X` shadowed by a function-local `const X` | **done** |
+
+**The prune rule is `declarators > file-scope-node-count`, NOT `> 1`.** A name can be bound
+twice *at file scope* legitimately — a **conditional module def** (`try: X = a; except: X = b`,
+or `if cond: X = a else: X = b`). Those make N file-scope nodes AND N declarators, so they're
+kept; a real local shadow makes declarators exceed file-scope nodes. Python forced this
+refinement (try/except const defs are everywhere); it's strictly more correct for all
+languages. `fileScopeValueCounts` (incremented in `captureValueRefScope`) tracks the file-scope
+node count per name. Also: same-name value-ref edges are suppressed (`refName !== scope.name`),
+since the two halves of a conditional def would otherwise cross-reference.
+
+**Go was the worked example of "step B matters":** the first pass added `go` to
+`VALUE_REF_LANGS` only, and a synthetic probe immediately showed a false positive —
+`func withShadow() { TimeoutSeconds := 5; return TimeoutSeconds }` got edged to the package
+`const TimeoutSeconds`, because the prune scanned `variable_declarator` (which Go doesn't
+have). Fix: add Go's `const_spec`/`var_spec`/`short_var_declaration` to the switch. Note the
+**precision-first tradeoff** this inherits from TS/JS — a shadowed target is dropped for the
+*whole file*, so a legit reader elsewhere in that file loses its edge too. On the Go sweep
+(gin/hugo/prometheus) this over-pruning was negligible (guard invariant clean, no LEAKs), so
+it wasn't worth per-reader analysis — but re-check it per language.
+
+### C. Confirm what kind the extractor assigns
+
+`captureValueRefScope` keys off `kind ∈ {constant, variable}` for targets. Index a sample file
+and check `select kind,name from nodes where file_path like '%sample%'` — confirm module-level
+constants come out as `constant`/`variable` (not `field`, `property`, `import`, etc.). If they
+come out as something else, adjust the target gate.
+
+### D. Wire + sweep
+
+1. Add the language string to `VALUE_REF_LANGS`.
+2. `npm run build`.
+3. Run §4.1 probe on **small / medium / large** public OSS repos (≥3 sizes). Prefer repos
+   with real config/constant/lookup-table modules (where the feature shines).
+4. Run §4.2 FP hunts on each. Fix FP clusters (extend a guard); record singletons.
+5. Run §4.3 impact delta on a few targets.
+6. Add a **matrix row** to `value-reference-edges.md` (per language) and a **test** to
+   `__tests__/value-reference-edges.test.ts` (positive read + a shadow/negative case).
+7. `npx vitest run __tests__/value-reference-edges.test.ts` and the full suite.
+
+**Pass bar:** node count identical on/off at every size; precision samples clean (FP clusters
+fixed); impact delta shows the blind→real radius win; full test suite green.
+
+---
+
+## 6. Git / PR workflow (how the prior ones were done)
+
+- Branch off `main` (e.g. `feat/value-refs-<lang>`). This validation work has lived on
+  `feat/value-refs-validation`; a new language can extend it or take its own branch.
+- A pure-validation change is **docs (+ a test)**; a precision fix is a focused **code** PR
+  (like #895). Keep code fixes separate from the doc/matrix update when practical.
+- Commit-message trailer: `Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>`.
+- PR body trailer: `🤖 Generated with [Claude Code](https://claude.com/claude-code)`.
+- Merge is the **maintainer's call** — don't self-merge unless told. Branch protection needs
+  `gh pr merge --squash --admin` when authorised (memory: `gh-merge-needs-admin`).
+- CHANGELOG: user-facing entries under `## [Unreleased]`; don't pre-create a version block.
+
+---
+
+## 7. Traps already hit (save yourself the time)
+
+- **Probe false-match:** `metadata LIKE '%valueRef%'` matches *filenames* in other edges'
+  metadata (e.g. an `interface-impl` `calls` edge whose `registeredAt` is
+  `…/textModelValueReference.ts`). **Always** filter `kind='references' AND metadata LIKE
+  '%"valueRef":true%'`. This created a phantom "method target" FP on vscode that was pure
+  query noise.
+- **`searchNodes` returns `SearchResult[]`** (`.node` wraps the `Node`) — in tests use
+  `.map(r => r.node)`. `getImpactRadius().nodes` is a **`Map`** — iterate `.values()`.
+- **`CodeGraph.initSync(dir, opts)` ignores `opts`** — it takes only the path; the default
+  config indexes `.ts`/`.tsx`/`.js`. Don't rely on a passed `include`.
+- **Node count must be identical on/off.** If it isn't, value-refs is (wrongly) creating nodes
+  — investigate before anything else.
+- **Big repos:** indexing vscode (11.5k files) took ~2m and a ~1GB DB per arm; clean up
+  `/tmp` after (each on/off pair is hundreds of MB to >2GB).
+- **require-bindings (CommonJS) are not FPs** — see §3. Don't "fix" them.
+- **Don't over-engineer a guard for a gap that doesn't manifest** (e.g. param-only shadow):
+  evidence-driven only. The maintainer steered toward minimal, surgical fixes.
+- **C macro-prefixed-prototype misparse (the C FP cluster):** an unknown leading macro
+  (`CURL_EXTERN`, `XXH_PUBLIC_API`) makes tree-sitter-c misparse a prototype `MACRO RetType
+  fn(args);` as a *declaration* whose declared "variable" is the bare return-type identifier
+  (`XXH_errorcode`), splitting `fn(args)` into a bogus expression. It mints one spurious type-named
+  global per prototype — then edged by every function of that type (redis `XXH_errorcode` 1→18).
+  These misparses *always* produce a **bare `identifier`** declarator (checked across
+  pointer/array/sized-return variants); real consts/tables always have an `init_declarator` and real
+  pointer/array globals their own declarator. Fix = **skip bare-`identifier` declarators** in the C
+  branch. The "extra" file-scope variable nodes also drop node-count vs an early pass — both arms
+  match, but don't be surprised the post-fix count is *lower*.
+- **"Easy path" ≠ "nodes already exist."** The §2b table classifies by *scope*; it does not promise
+  the language's consts are extracted. C sat in the easy column yet emitted zero file-scope const
+  nodes. Run §5 step C (`select kind,name from nodes where file_path like '%sample%'`) on a sample
+  *first* — if the consts aren't there, you're doing the Ruby treatment, not the easy path.
+- **Class consts may extract as `field` kind, not `constant` (Java/C#).** Step C must check the
+  *kind*, not just that a node exists: Java `static final` and C# `const`/`static readonly` came out
+  as `field`, which the value-ref target gate (`constant`/`variable` only) silently rejects — so the
+  feature emitted nothing despite the nodes being present. Fix = an `isConst` predicate on the
+  extractor (gated on the const modifiers) + a kind switch in `extractField` (scoped per-language so
+  other languages' fields stay `field`). Don't widen the *gate* to accept `field` — that would pull
+  in every mutable instance field as a target. And only the const *subset* converts: a Java instance
+  `final` or C# instance `readonly` is per-object state, must stay `field`.
+- **A zero-edge sweep with correctly-registered targets = the reader-scan node type (the PHP trap).**
+  Targets can register perfectly (right kind, right scope) and *still* produce zero edges if the
+  reader-scan doesn't recognise how the language writes a constant *read*. PHP refs a const as a
+  **`name`** node, not `identifier`/`constant`, so the scan saw nothing until `name` was added to the
+  match. Before assuming a target-gate bug on a sparse/empty sweep, dump a reader body and check the
+  node type of a known constant reference. (Adding a ref node type to the scan is safe across
+  languages — `flushValueRefs` only runs for the value-ref set, and a file holds only its own
+  grammar's nodes; `name` is PHP-only among the current set.)
+- **Same-file-only means cross-file-heavy languages yield less — that's correct, not a miss.** PHP
+  reads constants across files far more than within one (`Logger::DEBUG` everywhere), so laravel
+  (2,956 files) gave only 86 edges vs Ruby rails's 2,255. Don't chase it: cross-file value consumers
+  are out of scope for *every* language (would need import/scope resolution). Report the lower yield
+  honestly in the matrix rather than treating it as a bug to fix.
+- **Some extractors emit parameters/fields as `variable` at the wrong scope — restrict to `constant`
+  (the Pascal trap).** Pascal's extractor emits function `const`/`var` parameters and class fields as
+  `variable` parented to the enclosing unit/class, so they pass the target gate and collapse to noisy
+  file-wide targets (`Dest`, `aItem` read "everywhere"). The genuine shared values were all `constant`
+  (`declConst`), so the fix is a one-line per-language restriction in `captureValueRefScope`: Pascal
+  targets `constant` only. Before trusting a new language's `variable` targets, sample them — if they're
+  parameters or instance fields rather than module/global state, restrict to `constant`. (A residual
+  tail can still leak: tree-sitter-pascal context-dependently misparses a `const` param in a complex
+  Delphi signature as a `declConst` — a small parse-fidelity FP, accepted as a documented caveat.)
+- **A zero-edge sweep with targets present can be the READER side, not just the reader-scan node type
+  (the Dart trap).** Targets extracted fine, reader scopes registered, reader-scan node type correct —
+  and still zero edges, because Dart attaches a method **body as a next *sibling*** of the signature
+  node (which is what gets stored as the reader scope), so the scan walked only the signature subtree.
+  If a language's function/method body isn't a descendant of the node you register as the reader scope,
+  the scan won't see the reads — pull in the sibling/linked body. Check this when edges are zero but
+  both the targets and the reader nodes look right.
+
+---
+
+## 8. Reference
+
+- Code: `src/extraction/tree-sitter.ts` (`VALUE_REF_LANGS`, `captureValueRefScope`,
+  `flushValueRefs`), `src/extraction/generated-detection.ts` (`isGeneratedFile`).
+- Design + matrix: `docs/design/value-reference-edges.md`.
+- Tests: `__tests__/value-reference-edges.test.ts`.
+- PRs: **#895** (default-on + shadow prune), **#897** (TS/JS/tsx validation).
+- Memories: `value-reference-edges-default-on`, `agent-eval-targets-public-oss-only`,
+  `agent-eval-nested-attach`, `gh-merge-needs-admin`, `impact-coverage-findings`.

+ 469 - 0
docs/design/value-reference-edges.md

@@ -0,0 +1,469 @@
+# Design + status: same-file value-reference edges
+
+**Status:** SHIPPED (default-on for TS/JS/tsx + Go + Python + Rust + Ruby + C + Java + C# + PHP + Scala + Kotlin + Swift + Dart + Pascal; `CODEGRAPH_VALUE_REFS=0` disables). The
+emitter lives in `TreeSitterExtractor.flushValueRefs` (`src/extraction/tree-sitter.ts`).
+**Motivation:** close the impact-analysis hole for *value consumers*. Static
+extraction edges calls, imports, and inheritance, but never edges a constant to the
+symbols that read it — so changing a config object / lookup table / shared constant
+looked like "nothing depends on this." This is the "change this table, break its
+readers" class of change (the ReScript-PR false positive that motivated the work).
+
+---
+
+## TL;DR for a new session
+
+We emit a `references` edge (`metadata: { valueRef: true }`) from a reader symbol to
+the **file/package-scope `const`/`var` it reads**, same-file only, for TS/JS/tsx + Go + Python + Rust + Ruby + C + Java + C# + PHP + Scala + Kotlin + Swift + Dart + Pascal. Those edges
+flow straight into `getImpactRadius` / `codegraph impact` and the impact trail in
+`codegraph_explore` / `codegraph_node` — no agent-behaviour change required.
+
+The win is **impact-radius correctness**, not agent read-reduction (see "Agent A/B").
+
+## Edge semantics
+
+- **Target:** a file-scope `const`/`var` whose name is "distinctive" (≥3 chars and
+  contains an uppercase letter or `_`) — dodges the local-shadowing precision trap
+  that single-letter / all-lowercase names invite.
+- **Reader (source):** any `function` / `method` / `const` / `var` symbol whose body
+  references the target name.
+- **Same-file only** — resolution is unambiguous without import/scope analysis.
+- **Deduped** per `(reader, target)`. **Additive** — adds edges, never nodes.
+
+## Precision guards (in emission order)
+
+1. **`isGeneratedFile(path)`** — skip suffix-recognised generated files (`.pb.ts`,
+   `.min.js`, …). Path-only; it cannot catch content-minified bundles.
+2. **Shadow prune** — drop a target when its **declarator count exceeds its file-scope node
+   count**, i.e. it's also bound in an *inner* (local) scope. A bundled/Emscripten `const
+   Module` re-declared as an inner `var Module`, a Go package const shadowed by a local `:=`,
+   or a Python module const shadowed by a local `=` all resolve to the inner binding for nested
+   readers — a file-scope edge would be a false positive. Inner re-bindings aren't graph nodes,
+   so declarators are counted at the syntax level (per-grammar node types: `variable_declarator`
+   for TS/JS, `const_spec`/`var_spec`/`short_var_declaration` for Go, `assignment` for Python,
+   `const_item`/`static_item`/`let_declaration` for Rust).
+   Comparing against file-scope node count (not a flat ">1") keeps **conditional module defs**
+   (`try: X=…; except: X=…`), which legitimately bind a name twice at file scope. This catches
+   the content-minified bundles guard #1 misses.
+3. **Distinctive-name + same-file** as above.
+
+## Validation matrix — TS / JS / Go / Python / Rust / Ruby / C / Java / C# / PHP / Scala / Kotlin / Swift / Dart / Pascal
+
+Method per repo: index the same tree twice (value-refs on vs `CODEGRAPH_VALUE_REFS=0`),
+diff node/edge counts, spot-check precision, and measure `codegraph impact` on a few
+file-scope consts. Node count must be **identical** on/off (edges-only feature).
+
+**TypeScript**
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| sindresorhus/ky | small | 54 | 562 (stable) | +29 (0.8%) | all sampled TP | — |
+| excalidraw/excalidraw | medium | 645 | 10,301 (stable) | +717 (1.6%) | TP after shadow prune (#895 removed 23 woff2-bundle FPs) | `tablerIconProps` 1→**170** |
+| microsoft/vscode | large | 11,548 | 333,999 (stable) | +10,605 (0.69%) | all sampled TP; no param-shadow / bundle FPs in top 200 | `LayoutStateKeys` 1→**85**, `CORE_WEIGHT` 1→52 |
+
+**JavaScript** (same extractor; CommonJS, `var`, IIFE/UMD)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| expressjs/express | small | 147 | 1,082 (stable) | +27 (0.75%) | all sampled TP | — |
+| eslint/eslint | medium | 1,420 | 7,167 (stable) | +1,192 (4.2%) | all sampled TP; guard holds; no minified-file FPs | `internalSlotsMap` 1→**32**, `INDEX_MAP` 1→27 |
+| webpack/webpack | large | 9,371 | 28,922 (stable) | +3,521 (4.8%) | all sampled TP; guard holds; no minified-file FPs | `LogType` 1→**89**, `LOG_SYMBOL` 1→90, `UsageState` 2→52 |
+
+**Go** (package-level `const`/`var`; required extending the shadow prune — see below)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| gin-gonic/gin | small | 110 | 2,599 (stable) | +166 (1.9%) | all sampled TP; guard holds | `abortIndex` 1→**24**, `jsonContentType` 1→8 |
+| gohugoio/hugo | medium | 952 | 19,160 (stable) | +1,616 (2.5%) | all sampled TP; guard holds | `filepathSeparator` 2→**26** |
+| prometheus/prometheus | large | 1,329 | 23,322 (stable) | +3,466 (3.3%) | all sampled TP; guard holds | `rdsLabelInstance` 1→**82**, `ec2Label` 1→24 |
+| kubernetes/kubernetes | very large | 19,160 | 251,086 (stable) | +20,574 (1.9%) | all sampled TP; guard holds on 250 targets | `KubeletSubsystem` 3→**138**, `LEVEL_0` 1→102 |
+
+**Python** (module-level `NAME = …`; required extending the prune *and* refining its rule — see below)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| psf/requests | small | 49 | 1,299 (stable) | +85 (2.9%) | all sampled TP; guard holds | `ITER_CHUNK_SIZE` 1→4, `DEFAULT_POOLBLOCK` 1→4 |
+| sqlalchemy/sqlalchemy | medium | 679 | 59,963 (stable) | +1,929 (0.8%) | all sampled TP; guard holds | `COMPARE_FAILED` 1→**26**, `DB_LINK_PLACEHOLDER` 1→19 |
+| django/django | large | 3,005 | 61,748 (stable) | +1,328 (0.7%) | all sampled TP; guard holds | `_trans` 1→**138**, `SEARCH_VAR` 4→8 |
+
+**Rust** (module-level `const`/`static`; declarators added, no rule change needed)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| BurntSushi/ripgrep | small | 107 | 3,731 (stable) | +144 (0.9%) | all sampled TP; guard holds | `SHERLOCK` 7→**113** |
+| tokio-rs/tokio | medium | 795 | 13,281 (stable) | +476 (1.1%) | all sampled TP; `#[cfg]`-conditional consts kept | `PERMIT_SHIFT` 1→**97**, `LOCAL_QUEUE_CAPACITY` 2→46 |
+| rust-lang/rust-analyzer | large | 1,530 | 38,780 (stable) | +475 (0.25%) | all sampled TP; 0 real shadow leaks | `INLINE_CAP` 2→**183**, `SPAN_PARTS_BIT` 2→18 |
+
+**Ruby** (`CONST = …`, almost always **inside a class/module** — needed the class-scope extension)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| sinatra/sinatra | small | 96 | 1,800 (stable) | +73 (2.1%) | ~100% TP (flags are valid nested reads) | `HEADER_PARAM` 1→**5** |
+| jekyll/jekyll | medium | 218 | 1,906 (stable) | +100 (2.4%) | ~100% TP | `DEFAULT_PRIORITY` 1→3, `LOG_LEVELS` 4→5 |
+| rails/rails | large | 1,452 | 61,911 (stable) | +2,255 (1.2%) | ~98% TP (same-file ambiguity 21/1208 targets) | `Post` (Struct const) 75 readers |
+
+**C** (file-scope `static const` scalars + pointer/array lookup tables + mutable globals; required
+extracting the nodes first — see below)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| redis/hiredis | small | 52 | 1,161 (stable) | +29 (2.5%) | all sampled TP; guard holds | `hiredisAllocFns` 1→**71** |
+| curl/curl | large | 994 | 16,124 (stable) | +597 (3.7%) | all sampled TP; guard holds; no minified FPs | `Curl_ssl` 3→**57** |
+| redis/redis | medium | 782 | 19,446 (stable) | +1,634 (8.4%) | all sampled TP after the macro-misparse fix; guard holds | `asmManager` 2→**97**, `keyMetaClass` 1→36, `XXH3_kSecret` 1→27, `helpEntries` 1→13 |
+
+**Java** (class-scope `static final` constants; required emitting them as `constant` kind — see below)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| google/gson | small | 262 | 8,563 (stable) | +387 | all sampled TP; guard holds | `PEEKED_NONE` 1→**31** |
+| apache/commons-lang | medium | 623 | 19,976 (stable) | +2,087 | all sampled TP; guard holds; no minified FPs | `INDEX_NOT_FOUND` 4→**165**, `EMPTY` 5→161 |
+| google/guava | large | 3,227 | 130,945 (stable) | +6,354 | all sampled TP; guard holds; no minified FPs | `APPLICATION_TYPE` 2→**126**, `ABSENT` 4→66 |
+
+**C#** (class-scope `const` / `static readonly`; same `field`→`constant` change as Java)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| AutoMapper/AutoMapper | small | 511 | 19,254 (stable) | +133 | all sampled TP; guard holds | `ContextParameter` 1→**17**, `InstanceFlags` 1→14 |
+| JamesNK/Newtonsoft.Json | medium | 945 | 20,208 (stable) | +344 | all sampled TP; guard holds | `DefaultFlags` 1→**37**, `JsonNamespaceUri` 1→15 |
+| dotnet/efcore | large | 5,731 | 140,847 (stable) | +3,720 | all sampled TP; guard holds; no minified FPs | `_resourceManager` 22→**1664**, `Prefix` 40→237, `Guid77` 2→191 |
+
+**PHP** (top-level `const` + class `const`, both already `constant`; needed only a reader-scan tweak — see below)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| guzzle/guzzle | small | 81 | 1,655 (stable) | +5 (sparse — see note) | all sampled TP; no collisions | `CONNECTION_ERRORS` 1→3 |
+| Seldaek/monolog | medium | 217 | 3,047 (stable) | +79 | all sampled TP; no class/const collisions | `DEFAULT_JSON_FLAGS` 1→**18**, `RFC_5424_LEVELS` 1→17 |
+| laravel/framework | large | 2,956 | 57,519 (stable) | +86 | all sampled TP; no minified/collision FPs | `INVISIBLE_CHARACTERS` 1→**93**, `SESSION_ID_LENGTH` 1→9 |
+
+**Scala** (top-level `val` + `object` val — re-kinded from `field`; `class` instance vals stay `field`)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| com-lihaoyi/upickle | small | 145 | 3,052 (stable) | +82 | all sampled TP; no class/method collisions | `IntegralPattern` 1→**9** |
+| typelevel/cats | medium | 835 | 15,774 (stable) | +89 | sampled TP; flagged val/def name-collisions were real object vals read by siblings | `maxArity` 3→**17**, `fusionMaxStackDepth` 1→13, `minIntValue` 1→7 |
+| apache/pekko | large | 2,720 | 135,041 (stable) | +8,453 (2,065 Scala) | Scala object vals clean; the bulk are valid Java `PARSER`/`DEFAULT_INSTANCE` from generated protobuf `.java` | `ErrorLevel` 5→**33**, `WarningLevel` 5→29 |
+
+**Kotlin** (top-level / `object` / `companion object` `val` → `constant`; `class` instance vals stay `field`)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| square/okio | small | 307 | 8,540 (stable) | +157 | all sampled TP; 0 collisions | `STATE_IN_QUEUE` 1→**32**, `HMAC_KEY` 1→9 |
+| Kotlin/kotlinx.coroutines | medium | 1,039 | 17,058 (stable) | +210 | all sampled TP; 1 cross-file collision | `BLOCKING_SHIFT` 1→**24**, `TERMINATED` 2→22 (companion bit-masks) |
+| ktorio/ktor | large | 2,302 | 43,272 (stable) | +849 | object/companion consts (HTTP header names); flagged collisions are real consts; `TYPE` is a sibling-companion ambiguity | `TYPE` 8→**109**, `FailedPath` 1→22 |
+
+**Swift** (top-level `let` + `static let` in `struct`/`enum`/`class` → `constant`; instance `let` stays `field`; computed properties skipped)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| Alamofire/Alamofire | small | 98 | 4,192 (stable) | +108 | all sampled TP; 0 collisions; computed properties skipped | `defaultRetryLimit` 1→3, `defaultWait` 1→4 |
+| apple/swift-argument-parser | medium | 165 | 4,435 (stable) | +36 | all sampled TP; 1 sibling-type collision (`usageString`) | `usageString` 8→**18**, `labelColumnWidth` 1→2 |
+| apple/swift-nio | large | 554 | 20,136 (stable) | +589 | all sampled TP; 0 collisions; `eventLoop` (static let) verified TP | `CONNECT_DELAYER` 1→**15**, `SINGLE_IPv4_RESULT` 1→12 |
+
+**Dart** (top-level `const`/`final` + class `static const`/`static final` = the `static_final_declaration` node → `constant`)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| dart-lang/http | small | 324 | 4,860 (stable) | +668 | real source TP; numbers skewed by a JNIGEN `_bindings.dart` (sibling-class collapse) | `Finishing` 1→**10**, `CONNECTION_PREFACE` 5→7 |
+| flame-engine/flame | medium | 1,655 | 19,608 (stable) | +465 | all sampled TP; bounded const-vs-getter collisions | `cardWidth` 4→**15**, `tileSize` 3→12 |
+| flutter/packages | large | 3,452 | 116,075 (stable) | +10,015 | real Flutter consts; some `.gen.dart` (pigeon) generated noise | `iconFont` 1→**1790**, `_channel` 6→72, `kMaxId` 1→23 |
+
+**Pascal / Delphi** (unit/class `const` → `constant`; **`constant`-only** targets — the extractor emits params/fields as `variable`)
+
+| Repo | size | files | nodes (on=off) | +value-ref edges | precision | `impact` on→off example |
+|---|---|---|---|---|---|---|
+| HashLoad/horse | small | 74 | 2,464 (stable) | +4 (sparse — cross-unit reads) | all sampled TP | `LOG_NFACILITIES` (Syslog const) |
+| synopse/mORMot2 | medium | 539 | 66,760 (stable) | +2,240 | precision sample 100% TP (font/crypto/DB consts); a few `const`-param misparse FPs in complex Delphi sigs | `LIB_CRYPTO` 1→**358**, `DEFAULT_ECCROUNDS` 1→31 |
+| castle-engine | large | 2,430 | 93,692 (stable) | +6,983 | top targets all real FFI binding consts; 0 collisions | `LazGio2_library` 2→**1880**, `LIB_CAIRO` 1→223 |
+
+Across S/M/L in all fifteen languages: node count never moved, the precision guards held, and
+the `impact` OFF column is the bug — a const that 80–140 symbols read reports "1 affected"
+without value-refs.
+
+**Go required a code change** (unlike JS/tsx, which the existing guards covered unchanged).
+Go puts its constants at package = file scope (good — the target gate fits), but its
+declarators are `const_spec`/`var_spec`/`short_var_declaration`, not `variable_declarator`, so
+the shadow prune was a no-op for Go and a package `const Timeout` shadowed by a local
+`Timeout := …` produced a false positive. Extending the prune's declarator switch to Go's node
+types fixed it (one synthetic repro, then clean across gin/hugo/prometheus). This is the
+template for the next language: **the shadow prune is per-grammar and must be wired per
+language** (see the playbook).
+
+**Python forced a refinement of the prune *rule* — a general improvement.** Python's
+declarator is `assignment` (added to the switch). But Python also **conditionally defines
+module constants** (`try: HAS_SSL = True; except: HAS_SSL = False`) — a very common idiom that
+binds the name twice *at module scope*. The old "bound more than once → drop" rule over-pruned
+these (dropping a real const and its readers). The fix distinguishes a conditional module def
+from a real shadow by comparing declarator count against the number of **file-scope nodes** the
+name has: a conditional def makes them equal (both bindings are file-scope), a local shadow
+makes declarators exceed file-scope nodes (the excess is the local). This is strictly more
+correct for *all* languages. (It also made the two halves of a conditional def cross-reference
+via their own names, so same-name value-ref edges are now suppressed.)
+
+**Rust needed only declarators — the rule was already right.** Rust's are `const_item` /
+`static_item` (module consts) and `let_declaration` (the local that shadows). Adding them to
+the switch fixed the expected shadow FP (a `const TIMEOUT` shadowed by a local `let TIMEOUT`).
+Rust also has the conditional-def pattern — `#[cfg(unix)] const SEP = …; #[cfg(windows)] const
+SEP = …` — and the Python-era file-scope-count rule already keeps those correctly (validated on
+tokio's `io/interest.rs` cfg-gated flags). One nice property fell out: consts written inside a
+config macro (`cfg_aio! { … }`) live in an unparsed token tree, so the prune's syntax walk
+doesn't even see them.
+
+**Ruby is the class-scope case — and required three changes.** Ruby keeps almost all constants
+*inside* a class/module (jekyll's `lib/`: 0 top-level vs 58 class-internal), so the original
+file-scope-only target gate covered ~nothing. Three Ruby-specific fixes: (1) the extractor now
+creates nodes for constant assignments (`CONST = …` has a `constant`-typed LHS, not
+`identifier`, so they were never extracted at all) — including class-internal ones; (2) the
+value-ref target gate accepts `class:`/`module:` parents, not just `file:`; (3) the reader-scan
+matches `constant` nodes, since in Ruby both a constant's definition and its references are
+`constant`-typed. **Effectively Ruby-only:** Rust impl consts are parented to `file:` already
+(so the gate change doesn't touch them — ripgrep stayed at 144 edges), and TS/Python class
+members aren't `constant`/`variable` kind.
+
+The interesting precision question — *which* class does a class-scope target belong to — turns
+out to favor a **file-wide** target map (a name maps to one target per file), because Ruby's
+constant lookup is **lexical + ancestor**: a method in a nested class legitimately reads an
+enclosing class's constant (verified on jekyll's `ERBRenderer→ThemeBuilder::SCAFFOLD_DIRECTORIES`
+and sinatra's `AcceptEntry→Request::HEADER_PARAM`). Strict same-class matching would wrongly drop
+those. The only real false positive is the same constant name defined in *sibling* (un-nested)
+classes in one file — 21 of 1,208 targets (1.7%) on rails, and most of those resolve fine too;
+referencing a sibling class's bare constant is a NameError in real Ruby, so valid code rarely
+hits it. Net precision ~98–100%.
+
+**C was NOT the "easy path" the language tracker first assumed — it needed the extractor to emit
+the nodes first.** C keeps shareable values at file scope (`static const` scalars, and very
+commonly pointer/array **lookup tables** + mutable global state), which fits the file-scope target
+gate. But unlike Go/Rust (whose const nodes already existed), C's file-scope `const`/`var` were
+**never extracted as nodes at all**: a C `declaration` nests its name inside an `init_declarator`
+(through `pointer_declarator`/`array_declarator`), and the generic variable-extraction fallback
+only finds a *direct* `identifier` child — so it produced nothing. Three changes (the same shape as
+Ruby's): (1) a C branch in `extractVariable` that resolves the name through the declarator chain and
+emits file-scope declarations as `constant`/`variable` (skipping function-body locals via an
+ancestor check, and `function_declarator` prototypes); (2) an `isConst` on the C extractor (a
+`const` `type_qualifier` → `constant` kind); (3) the shadow prune's declarator switch extended with
+`init_declarator`. Scoped to **C only** — C++ stays on the generic fallback (its class-scope members
+are the harder bucket).
+
+The one false-positive cluster the sweep surfaced was a **macro-prefixed-prototype misparse**, and
+the fix is the load-bearing C detail: an unknown leading macro (`CURL_EXTERN`, `XXH_PUBLIC_API`)
+makes tree-sitter-c misparse a prototype `MACRO RetType fn(args);` as a declaration whose declared
+"variable" is the **bare return-type identifier** (`XXH_errorcode`/`CURLcode`), splitting `fn(args)`
+off as a bogus expression — minting one spurious type-named global per prototype, then edged by
+every function returning that type (redis's `XXH_errorcode` 1→18 before the fix). These misparses
+*always* yield a **bare `identifier`** declarator (verified across pointer/array/sized return
+variants); real consts/tables always carry an initializer (`init_declarator`) and real
+pointer/array globals carry their own declarator. So the C branch **skips bare-`identifier`
+declarators entirely** — killing the whole FP class at the cost of only uninitialized scalar globals
+(`static int g;`), which are rare and low-value. After the fix: every sampled edge on
+hiredis/redis/curl was a true positive, the guard-invariant leak check found 0 shadows across all
+three, and `impact` deltas confirm the blind→real radius (`asmManager` 2→97, `Curl_ssl` 3→57,
+`hiredisAllocFns` 1→71).
+
+**Java + C# were the cleanest class-scope languages — one kind switch, no new guards.** Both keep
+constants *inside a class* (Java `static final` fields; C# `const` / `static readonly`), so unlike
+C the nodes already existed — but as **`field`** kind, which the value-ref gate (`constant`/
+`variable` only) rejects. The whole change was emitting the constant *subset* as `constant`: an
+`isConst` predicate on each extractor (Java = a `static final` field; C# = a `const`, or a `static
+readonly`) plus a kind switch in `extractField`. Everything else was already in place — the
+class-scope target gate (from Ruby), the `identifier` reader-scan, and crucially the shadow prune:
+a method-local that shadows a class const is a `variable_declarator` in both grammars, *already* in
+the prune switch, so a class const shadowed by a local is dropped with no new wiring (validated by
+the Java/C# shadow tests). Instance fields stay `field` — a Java instance `final` or a C# instance
+`readonly` is per-object state, not a shared constant, so it's never a target. The distinctive-name
+gate fits both conventions cleanly (Java `UPPER_SNAKE`, C# `PascalCase`), so no FP class emerged:
+across S/M/L (gson/commons-lang/guava, automapper/newtonsoft/efcore) every sampled edge was a true
+positive, 0 shadow leaks, no minified-file FPs, node count identical on/off. The `impact` wins are
+the headline — Java's canonical `public static final` constants (`INDEX_NOT_FOUND` 4→165, `EMPTY`
+5→161) and C#'s `const`/`static readonly` (`Prefix` 40→237, a generated `_resourceManager` 22→1664)
+all went from a blind "1 affected" to their real radius. The known sibling-class limitation (the
+same const name in two classes in one file resolves to the file-wide target) is shared with Ruby and
+stayed negligible.
+
+**PHP was a near-pure "easy path" — one reader-scan line, no extractor change, no prune wiring.**
+PHP already extracts both top-level `const X = …` and class `const X = …` as `constant` kind (a
+dedicated `const_declaration` handler), inside the right scope (`file:` / `class:`, both gated). The
+*only* change was the reader-scan: PHP represents a constant *reference* — bare `X`, or the const
+half of `self::X` / `Foo::X` / `static::X` — as a **`name`** node, which the scan (matching
+`identifier` / `constant`) missed, so it found nothing until `name` was added. That's safe across
+languages: `flushValueRefs` only runs for the value-ref set, and `name` is PHP-only among them. **No
+shadow prune was needed at all** — a PHP local is a `$var` (`variable_name`), a different namespace
+from a bare constant, so a local can *never* shadow a constant; there is nothing to prune (the
+cleanest case yet). Precision was excellent: UPPER_SNAKE constants fit the distinctive-name gate, and
+a dedicated check for a target whose name collides with a same-file *class* (PHP's one realistic FP —
+`name` nodes also name classes in `new Foo()` / `Foo::`) found **zero** collisions across
+guzzle/monolog/laravel; every sampled edge was a true positive, node count identical on/off.
+
+**The honest caveat: PHP is lower-yield than the class-scope languages, by design.** PHP idiom reads
+constants *across* files far more than within one (a `Logger::DEBUG` or a config constant consumed
+everywhere), and value-refs is **same-file only** — so laravel (2,956 files) produced only 86 edges
+vs. Ruby rails's 2,255 (1,452 files). This is not a miss: the cross-file reads are out of scope for
+*every* language (resolution would need import/scope analysis), and PHP simply leans on them more.
+The same-file reads it *does* capture are clean and the transitive impact wins are real
+(`INVISIBLE_CHARACTERS` 1→93 from 3 direct readers). Net: correct and additive, just a smaller
+absolute contribution than Java/C#/Go.
+
+**Scala — the `object` is the constant scope.** Scala has no `static`; the idiom for a shared
+constant is a `val` inside a singleton `object` (`object Config { val Timeout = 30 }`). A top-level
+`val` already extracted as `constant`, but `object` and `class` vals both came out as `field` (the
+gate rejects `field`). The fix is a kind refinement in the Scala `val_definition` handler: walk to
+the enclosing definition and treat an `object_definition` (or top level) val as `constant`/`variable`
+— while a `class`/`trait`/`enum` val stays `field`, because it is per-instance immutable state, the
+exact analogue of the Java instance `final` we also keep as `field`. (`object` and `class` both
+extract as `class` *kind*, so the distinction is the enclosing AST node type, not the node kind.)
+The shadow prune gained `val_definition`/`var_definition` (a method-local `val` can shadow an object
+val); the reader-scan needed nothing, since a Scala val reference is a plain `identifier`. Method-local
+vals are not extracted at all, so they're not a target source. The one **known limitation** is
+Scala's interchangeable `val`/`def` for members: a camelCase val can share a name with a method in the
+same file, and same-file name matching can't distinguish them — but it's bounded (like Ruby's
+sibling-class case), and on the sweep every flagged val/def collision turned out to be a real `object`
+val read by sibling vals (cats' typeclass instances: `val flatMap = monad`, read by
+`invariantSemigroupal`). Validated S/M/L (upickle/cats/pekko): node count identical on/off, top
+targets genuine object vals (`maxArity` `val = 22`, `DigitTens` lookup table), impact wins real
+(`maxArity` 3→17). The distinctive-name gate fits Scala's camelCase/PascalCase constants (`maxArity`,
+`IntegralPattern`) via their internal uppercase letter.
+
+**Kotlin combined three already-built techniques.** Kotlin has no `static`: shared constants live at
+top level, in an `object` (singleton), or in a class's `companion object` — all `val`/`const val`. A
+class instance `val` is per-object state. Nothing extracted before because a Kotlin property name
+nests (`property_declaration → variable_declaration → simple_identifier`) and the generic path reads
+only a direct child — the **C** problem. The fix handles `property_declaration` in the Kotlin
+`visitNode` hook (where the existing one already manages `fun interface` misparses): pull the nested
+name, then walk to the enclosing definition to set the kind — `object_declaration`/`companion_object`
+(or top level) → `constant`/`variable` (the **Scala** object-vs-class rule), `class_declaration` →
+`field`, and a property under a `function_body`/`init`/lambda is a local and skipped. The reader-scan
+gained `simple_identifier` (Kotlin's reference node — the **PHP `name`** move; `simple_identifier` is
+Kotlin-only among the value-ref set), and the shadow prune gained `property_declaration` (a method-local
+`val` can shadow an object const). Kotlin's parse fidelity is clean (its one known misparse,
+`fun interface`, is already handled), so unlike C++ no precision tail emerged. It validated as one of
+the *cleanest* languages: companion-object bit-masks and state constants are a heavy, same-file-read
+idiom (coroutines' `BLOCKING_SHIFT` 1→24, `TERMINATED` 2→22 in the scheduler; okio's `STATE_IN_QUEUE`
+1→32; ktor's content-type `TYPE` 8→109). okio had 0 collisions, coroutines 1 (cross-file). The same
+val/def-or-class name-overlap limitation as Scala applies (ktor's HTTP DSL names a header const and a
+class the same), plus the sibling-companion case (several `companion object { const val TYPE }` in one
+file collapse to the file-wide target, like Ruby's sibling-class) — both bounded, and every flagged
+collision investigated was a real object/companion const.
+
+**Swift reused the Kotlin techniques and added two Swift-specific touches.** Swift has no `static`
+keyword for globals; its shared-constant idiom is a top-level `let` or a `static let` inside a type —
+and Swift idiomatically *namespaces* constants in `enum`/`struct` (`enum Constants { static let X }`).
+A property name nests (`property_declaration → <name> pattern → simple_identifier`), the C-style
+problem; the reader-scan already matched `simple_identifier` (added for Kotlin — Swift shares it). The
+kind rule: top-level `let` and `static let` (in any type) → `constant` (`var` → `variable`); an
+*instance* `let`/`var` stays `field` (Swift instance stored properties otherwise aren't own nodes —
+unchanged). The two Swift-specific touches: (1) **the value-ref target gate was widened to `struct:`/
+`enum:` parents**, because Swift namespaces constants in those (every other language's targets sit at
+`file:`/`class:`/`module:`); without it, the heavily-used `enum`/`struct` static consts would all be
+missed. (2) **Computed properties are skipped** — a `var x: Int { … }` has a getter block, no stored
+value, and isn't a constant; the extractor detects the `computed_property` child and emits no node
+(verified: no computed-property leaks across the sweep). The node creation slots into the *existing*
+Swift `property_declaration` handler (which already extracts property-wrapper / type-annotation
+dependencies like `@Published`/`@State`), so that behavior is untouched. Validated S/M/L
+(Alamofire/swift-argument-parser/swift-nio): node count identical on/off, genuine static-let
+constants (`defaultRetryLimit`, swift-nio's `CONNECT_DELAYER`/`SINGLE_IPv4_RESULT` test constants, a
+shared `static let eventLoop` read by 37 methods), computed properties skipped, 0–1 collisions per
+repo (the same sibling-type name-overlap bound as Kotlin/Ruby).
+
+**Dart — the grammar did the scope separation; the catch was a sibling body.** Dart's tree-sitter
+grammar is unusually helpful here: a **`static_final_declaration`** node is *exactly* a top-level or
+class-`static` `const`/`final` — the shared-constant idiom — while instance fields and `var` use
+`initialized_identifier` and method-locals use `initialized_variable_definition`. So a single
+`visitNode` rule (`static_final_declaration` → `constant`, named by its `identifier` child) captures
+all and only the constants, with **no instance/local leaks to guard** and no scope-walk needed (the
+node stack gives `file:` for top-level, `class:` for a static member). The reader-scan was already
+covered (Dart references are plain `identifier`). The non-obvious bug: **Dart attaches a method/function
+`body` as a next *sibling* of the signature node** — and the signature is what gets stored as the
+reader scope — so the scan walked only the signature and produced *zero* edges until it was taught to
+also pull in a `function_body` next-sibling (Dart is the only value-ref language that structures bodies
+this way, so the check is inert elsewhere). The shadow prune counts all three Dart declarator nodes so
+a method-local `const X` correctly drops a file-scope `const X`. Validated S/M/L (http /
+flame-engine/flame / flutter/packages): node count identical on/off, genuine static consts on real
+source (flame's `cardWidth` 4→15, `tileSize` 3→12; HTTP/2's `Finishing` 1→10), the same bounded
+const-vs-getter name overlap as Kotlin/Scala. **The one caveat is generated code:** the common Dart
+codegen suffixes (`.g.dart` / `.freezed.dart` / `.pb.dart`) are already skipped by `isGeneratedFile`,
+but a header-only-marked generator (a JNIGEN `_bindings.dart` with hundreds of `static final _class`)
+isn't suffix-detected, so it collapses to the file-wide target and dominates a small repo's numbers
+(http) — real source stays clean.
+
+**Pascal / Delphi — the easy path plus the Dart sibling-body fix and a `constant`-only restriction.**
+Pascal keeps shared constants in a `const` section at unit (file) or class scope, and those *already*
+extracted as `constant` (`variableTypes: ['declConst', …]`), so wiring was add-to-`VALUE_REF_LANGS` +
+the shadow prune (`declConst`/`declVar` — a function-local `const X` shadows a unit `const X`). It hit
+the **same reader-scan bug as Dart**: Pascal attaches a proc body (`block`) as a *next sibling* of the
+`declProc` header (the reader scope), both under a `defProc`, so the same sibling-pull fix was extended
+to `block`. The Pascal-specific wrinkle is precision: the Pascal extractor emits function **parameters**
+(`const ATarget: TControl`, `var Dest: …`) and class **fields** as `variable` at the enclosing scope,
+which collapse to noisy file-wide targets — so **Pascal value-ref targets are restricted to
+`constant`** (genuine shared values are `const`; the cost is the rare unit-level `var` global). That
+cleaned the bulk (`var`-param/field FPs gone). A residual minority remains — tree-sitter-pascal
+*context-dependently* misparses a `const` parameter in a complex multi-line Delphi method signature as
+a `declConst` (the `ATarget` case; not reproducible in isolation), a parse-fidelity tail like C++ but
+far smaller. After the fix: a random precision sample on mORMot was 100% TP (font/crypto/DB constants
+referencing each other), castle's top targets are all real FFI binding consts with 0 collisions, and
+the headline is FFI library-name constants — `LazGio2_library = 'libgio-2.0…'` read by **1880**
+`external` declarations (2→1880), mORMot's `LIB_CRYPTO` 1→358. **Caveats:** low same-file density on
+app code (cross-unit reads; horse gave 4 edges), the `const`-only restriction, the rare const-param
+misparse, and Pascal's case-insensitivity (the exact-text reader-scan misses a differently-cased
+reference — a miss, never an FP).
+
+**C++ was attempted and reverted** — the machinery (file/namespace-scope + class `field_declaration`
+extraction) is correct on clean C++, but tree-sitter-cpp's parse fidelity on real template/macro-heavy
+code (and the `.h`→C-grammar routing) leaks class members and parameters to file scope as bogus
+constants. Two guards (skip declarations under an `ERROR` or `compound_statement` ancestor) removed
+~83% of the gross leaks, but the residual pervaded even well-structured library source
+(template-class member leaks, amalgamated mega-headers, `.h`-as-C++). It did not reach the precision
+bar the other languages hold, so it was reverted. Reviving C++ needs prior work on C++ parse handling
+(template-class member scoping, `.h`-as-C++ detection, amalgamated-header exclusion), not a value-refs
+wiring pass. See the playbook's §2b C++ note.
+
+**`tsx` is covered by the TS rows** — excalidraw is a React/.tsx codebase, so the headline
+`tablerIconProps` (1→170) and most of its targets live in `.tsx` files. The one
+tsx-specific path — a const read *only* inside JSX (`<Foo x={CONST}/>`) — relies on the
+reader-scan descending into the JSX subtree; it's locked by a unit test
+(`value-reference-edges.test.ts`), so no separate tsx repo sweep is needed.
+
+**Svelte / Vue / Astro are covered for free** — their extractors re-parse the `<script>` /
+frontmatter block as `typescript` / `javascript`, which are in `VALUE_REF_LANGS`, so a `const`
+in a `.svelte`/`.vue`/`.astro` script edges its readers without any extra work (verified on a
+synthetic `.svelte`). No separate matrix row. See the playbook's coverage tracker (§2b) for the
+full status against the README's language list.
+
+**JavaScript note — CommonJS `require` bindings are targets, and that's correct.** JS edge
+growth (~4–5%) runs higher than TS (~0.7–1.6%) because `var x = require('…')` bindings and
+module-level `var` state pass the distinctive-name gate and are read by same-file functions.
+These are *not* noise: changing such a binding (swap the dependency, reassign the state)
+genuinely affects its readers, so it's a legitimate impact target. Where it overlaps an
+existing `calls` edge, `getImpactRadius` dedups by node — no double-counting. (TS `import`s
+dodge this entirely: they're `import`-kind nodes, not `const`/`var`, so never targets.)
+
+## Agent A/B — what it does and doesn't buy (excalidraw, sonnet/high, 12 runs)
+
+- **Impact API (the win):** `impact` ON vs OFF — `tablerIconProps` 1→170,
+  `COLOR_PALETTE` 15→26, `CaptureUpdateAction` 61→86. This is what `codegraph impact`
+  and CodeGraph Pro's verdict engine consume via `getImpactRadius`.
+- **Agent read-displacement: none — and that's expected.** On an indexed repo the agent
+  answers impact questions in one codegraph call (0 Read / 0 Grep in *both* arms), and it
+  reaches for `codegraph_search` / `callers`, **not** `impact`/`explore`, so it often
+  doesn't query the value-ref edges at all. ON was never worse than OFF. **Do not claim
+  value-refs reduces agent reads** — the win is blast-radius correctness, not fewer turns.
+  (This is the "adapt the tool to the agent" wall: edges only help if the agent calls the
+  edge-traversing tool.)
+
+## Known limitations (intentional)
+
+- **Parameter-only shadowing** is not guarded. The shadow prune counts
+  `variable_declarator`s, so a file-scope const shadowed *only* by a function parameter of
+  the same name would slip through. Not observed in S/M/L TS validation, and guarding it
+  would over-prune legitimate consts whose name coincides with a parameter elsewhere in
+  the file — so it's left unguarded until a real repo surfaces it.
+- **Same-file only.** Cross-file value consumers (a const imported and read elsewhere) are
+  not edged; that needs import/scope resolution and is out of scope.
+- **Reactive/computed reads** (a value read only through a framework getter) have no static
+  identifier to match and aren't covered.
+
+## Extending to another language
+
+The step-by-step runbook — wiring checklist, validation scripts, FP hunts, per-language
+declarator types, and traps — is in
+[`value-reference-edges-playbook.md`](./value-reference-edges-playbook.md). Point a fresh
+session at it and say "Start on language X." In short: decide whether the language's
+constants are file/module-scope (fits) or class-scope (bigger change); confirm the declarator
+node type for the shadow prune; sweep small/medium/large public OSS repos; fix FP clusters;
+add a matrix row here + a test.

+ 7 - 0
src/extraction/languages/c-cpp.ts

@@ -110,6 +110,13 @@ export const cExtractor: LanguageExtractor = {
   nameField: 'declarator',
   bodyField: 'body',
   paramsField: 'parameters',
+  // A `const`/`static const` file-scope declaration carries a `type_qualifier`
+  // child reading "const" — extract those as `constant`, plain globals as
+  // `variable`.
+  isConst: (node) =>
+    node.namedChildren.some(
+      (c: SyntaxNode) => c.type === 'type_qualifier' && c.text === 'const'
+    ),
   getReturnType: extractCppReturnType,
   resolveTypeAliasKind: (node, _source) => {
     // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`

+ 16 - 0
src/extraction/languages/csharp.ts

@@ -121,6 +121,22 @@ export const csharpExtractor: LanguageExtractor = {
     }
     return false;
   },
+  // `const` and `static readonly` fields are C# constants (`MaxItems`, lookup
+  // tables, shared config). Drives `constant` kind so value-reference edges
+  // target them; instance `readonly` / plain `static` fields stay `field`s.
+  isConst: (node) => {
+    let hasStatic = false;
+    let hasReadonly = false;
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type !== 'modifier') continue;
+      const t = child.text;
+      if (t === 'const') return true;
+      if (t === 'static') hasStatic = true;
+      else if (t === 'readonly') hasReadonly = true;
+    }
+    return hasStatic && hasReadonly;
+  },
   isAsync: (node) => {
     for (let i = 0; i < node.childCount; i++) {
       const child = node.child(i);

+ 22 - 0
src/extraction/languages/dart.ts

@@ -133,6 +133,28 @@ export const dartExtractor: LanguageExtractor = {
   callTypes: [],  // Dart calls use identifier+selector, handled via extractBareCall
   variableTypes: [],
   extraClassNodeTypes: ['mixin_declaration', 'extension_declaration'],
+  // A Dart `static_final_declaration` is exactly a top-level or class-`static`
+  // `const`/`final` — the shared-constant idiom — so extract it as `constant`
+  // for value-reference edges. Instance fields, `var`, and typed declarations
+  // use `initialized_identifier`, and method-locals use
+  // `initialized_variable_definition`; neither is this node, so there are no
+  // instance/local leaks to guard. The name is the first `identifier`; its
+  // parent scope (`file:` top-level / `class:` static member) comes from the
+  // node stack, both of which the value-reference target gate accepts.
+  visitNode: (node, ctx) => {
+    if (node.type === 'static_final_declaration') {
+      const nameNode = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+      if (nameNode) {
+        const valueNode = nameNode.nextNamedSibling;
+        const initValue = valueNode ? getNodeText(valueNode, ctx.source).slice(0, 100) : undefined;
+        ctx.createNode('constant', getNodeText(nameNode, ctx.source), node, {
+          signature: initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined,
+        });
+      }
+      return true;
+    }
+    return false;
+  },
   resolveBody: (node, bodyField) => {
     // Dart: function_body is a next sibling of function_signature/method_signature
     if (node.type === 'function_signature' || node.type === 'method_signature') {

+ 13 - 0
src/extraction/languages/java.ts

@@ -86,6 +86,19 @@ export const javaExtractor: LanguageExtractor = {
     }
     return false;
   },
+  // A `static final` field is a Java constant (`MAX_ITEMS`, lookup tables,
+  // shared config). Drives `constant` kind so value-reference edges target it;
+  // instance / `final`-only / `static`-only fields stay mutable `field`s.
+  isConst: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers') {
+        const text = child.text;
+        return /\bstatic\b/.test(text) && /\bfinal\b/.test(text);
+      }
+    }
+    return false;
+  },
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
     const scopedId = node.namedChildren.find((c: SyntaxNode) => c.type === 'scoped_identifier');

+ 45 - 0
src/extraction/languages/kotlin.ts

@@ -85,6 +85,51 @@ export const kotlinExtractor: LanguageExtractor = {
   nameField: 'simple_identifier',
   bodyField: 'function_body',
   visitNode: (node, ctx) => {
+    // Kotlin properties (`val` / `var` / `const val`). The name nests as
+    // property_declaration → variable_declaration → simple_identifier, which the
+    // generic variable/field path can't read — so nothing was extracted before.
+    // Kind by enclosing scope: a singleton `object` / `companion object` (and a
+    // top-level property) holds *shared* values — `val`→`constant`,
+    // `var`→`variable` (the Scala-object rule; a `const val` is a `val`). A
+    // `class`/`interface`/`enum` instance `val`/`var` is per-instance state →
+    // `field` (never a value-ref target, like a Java instance `final`). A
+    // property inside a function body / `init` block / lambda is a local and is
+    // skipped entirely.
+    if (node.type === 'property_declaration') {
+      const varDecl = node.namedChildren.find((c) => c.type === 'variable_declaration');
+      const nameNode = varDecl?.namedChildren.find((c) => c.type === 'simple_identifier');
+      if (!nameNode) return false; // destructuring `val (a,b)` etc. — leave to default
+      const name = getNodeText(nameNode, ctx.source);
+      if (!name) return false;
+
+      // Walk to the nearest enclosing definition: a function body / init / lambda
+      // means it's a local; `object`/`companion object` is a constant scope; a
+      // `class_declaration` (covers class/interface/enum) is an instance scope.
+      let scope: 'local' | 'const' | 'instance' = 'const';
+      for (let p = node.parent; p; p = p.parent) {
+        const pt = p.type;
+        if (
+          pt === 'function_body' || pt === 'function_declaration' ||
+          pt === 'lambda_literal' || pt === 'anonymous_initializer' ||
+          pt === 'control_structure_body' || pt === 'getter' || pt === 'setter'
+        ) { scope = 'local'; break; }
+        if (pt === 'companion_object' || pt === 'object_declaration') { scope = 'const'; break; }
+        if (pt === 'class_declaration') { scope = 'instance'; break; }
+      }
+      if (scope === 'local') return true; // a local — don't extract
+
+      const binding = node.namedChildren.find((c) => c.type === 'binding_pattern_kind');
+      const isVal = binding != null && getNodeText(binding, ctx.source) === 'val';
+      const kind = scope === 'instance' ? 'field' : isVal ? 'constant' : 'variable';
+
+      const typeNode = node.childForFieldName('type');
+      const sig = typeNode
+        ? `${isVal ? 'val' : 'var'} ${name}: ${getNodeText(typeNode, ctx.source)}`
+        : undefined;
+      ctx.createNode(kind, name, node, { signature: sig });
+      return true;
+    }
+
     // Handle Kotlin `fun interface` declarations.
     // Tree-sitter-kotlin doesn't support `fun interface` syntax (Kotlin 1.4+).
     // It produces two different misparse patterns:

+ 23 - 12
src/extraction/languages/scala.ts

@@ -136,18 +136,29 @@ export const scalaExtractor: LanguageExtractor = {
       const name = getValVarName(node, ctx.source);
       if (!name) return false;
 
-      const isInClass = ctx.nodeStack.length > 0 &&
-        (() => {
-          const parentId = ctx.nodeStack[ctx.nodeStack.length - 1];
-          const parentNode = ctx.nodes.find((n) => n.id === parentId);
-          return parentNode != null && (
-            parentNode.kind === 'class' || parentNode.kind === 'trait' ||
-            parentNode.kind === 'interface' || parentNode.kind === 'struct' ||
-            parentNode.kind === 'enum' || parentNode.kind === 'module'
-          );
-        })();
-
-      const kind = isInClass ? 'field' : (t === 'val_definition' ? 'constant' : 'variable');
+      // An `object` is a singleton: its `val`s are shared constants (the Scala
+      // idiom for `static final` — `object Config { val Timeout = 30 }`), so
+      // emit them as `constant`/`variable` like a top-level val, which lets
+      // value-reference edges target them. A `class`/`trait`/`enum`/`given` val
+      // is a per-instance immutable field. Both an `object` and a `class`
+      // extract as `class` kind, so the AST node type of the enclosing
+      // definition — not the parent node's kind — is what distinguishes them.
+      let enclosingDef: string | null = null;
+      for (let p = node.parent; p; p = p.parent) {
+        if (
+          p.type === 'class_definition' || p.type === 'trait_definition' ||
+          p.type === 'enum_definition' || p.type === 'given_definition' ||
+          p.type === 'object_definition'
+        ) {
+          enclosingDef = p.type;
+          break;
+        }
+      }
+      const isInstanceField =
+        enclosingDef === 'class_definition' || enclosingDef === 'trait_definition' ||
+        enclosingDef === 'enum_definition' || enclosingDef === 'given_definition';
+
+      const kind = isInstanceField ? 'field' : (t === 'val_definition' ? 'constant' : 'variable');
       const typeNode = node.childForFieldName('type');
       const sig = typeNode
         ? `${t === 'val_definition' ? 'val' : 'var'} ${name}: ${getNodeText(typeNode, ctx.source)}`

+ 324 - 28
src/extraction/tree-sitter.ts

@@ -151,6 +151,84 @@ function scalaBaseTypeName(node: SyntaxNode | null, source: string): string | nu
   }
 }
 
+/**
+ * Resolve the declared identifier inside a C declarator. A `declaration`'s
+ * `declarator` field nests the name through `init_declarator` (with value),
+ * `pointer_declarator`/`array_declarator`/`parenthesized_declarator`
+ * wrappers (each via their own `declarator` field) down to an `identifier`.
+ * A `function_declarator` means the declaration is a function prototype (or a
+ * function-pointer var) — return null so it isn't extracted as a variable.
+ */
+function cDeclaratorIdentifier(node: SyntaxNode | null): SyntaxNode | null {
+  let cur: SyntaxNode | null = node;
+  let guard = 0;
+  while (cur && guard++ < 12) {
+    switch (cur.type) {
+      case 'identifier':
+        return cur;
+      case 'function_declarator':
+        return null;
+      case 'init_declarator':
+      case 'pointer_declarator':
+      case 'array_declarator':
+      case 'parenthesized_declarator':
+        cur = getChildByField(cur, 'declarator');
+        break;
+      default:
+        return null;
+    }
+  }
+  return null;
+}
+
+/** First `simple_identifier` in `node`'s subtree (breadth-ish, first-found).
+ * Swift's property name nests as `property_declaration → <name> pattern →
+ * bound_identifier → simple_identifier`; this resolves it (and the bound name of
+ * a Kotlin/Swift property declarator for the shadow prune). For a tuple pattern
+ * (`let (a, b)`) it returns the first — acceptable, those are rare for consts. */
+function firstSimpleIdentifier(node: SyntaxNode | null): SyntaxNode | null {
+  const stack: SyntaxNode[] = node ? [node] : [];
+  let guard = 0;
+  while (stack.length > 0 && guard++ < 40) {
+    const n = stack.shift()!;
+    if (n.type === 'simple_identifier') return n;
+    for (let i = 0; i < n.namedChildCount; i++) {
+      const c = n.namedChild(i);
+      if (c) stack.push(c);
+    }
+  }
+  return null;
+}
+
+/** Swift property facts: the bound name, whether it's a `let`, and whether it's
+ * a *computed* property (a getter block, no stored value — never a constant). */
+function swiftPropertyInfo(
+  node: SyntaxNode,
+  source: string,
+): { nameNode: SyntaxNode | null; isLet: boolean; isComputed: boolean } {
+  const pattern =
+    getChildByField(node, 'name') ??
+    node.namedChildren.find((c) => c.type === 'value_binding_pattern' || c.type === 'pattern') ??
+    null;
+  const binding = node.namedChildren.find((c) => c.type === 'value_binding_pattern');
+  const isLet = binding != null && getNodeText(binding, source).trimStart().startsWith('let');
+  const isComputed = node.namedChildren.some(
+    (c) => c.type === 'computed_property' || c.type === 'protocol_property_requirements',
+  );
+  return { nameNode: firstSimpleIdentifier(pattern), isLet, isComputed };
+}
+
+/** True when `node` is (transitively) inside a C function body — i.e. a local,
+ * not a file/namespace-scope declaration. Walks the parent chain to the root. */
+function hasFunctionAncestor(node: SyntaxNode): boolean {
+  let p = node.parent;
+  while (p) {
+    if (p.type === 'function_definition') return true;
+    p = p.parent;
+  }
+  return false;
+}
+
 /**
  * PHP type-position wrapper node kinds (a type-hint is `named_type`,
  * `?Foo` is `optional_type`, `A|B` is `union_type`, `A&B` is
@@ -224,11 +302,12 @@ export class TreeSitterExtractor {
   // Value-reference edges (default ON; set CODEGRAPH_VALUE_REFS=0 to disable; see flushValueRefs).
   // Same-file reads of file-scope const/var symbols → `references` edges so impact analysis catches
   // value consumers ("change this constant/table, affect its readers").
-  private static readonly VALUE_REF_LANGS = new Set<string>(['typescript', 'javascript', 'tsx']);
+  private static readonly VALUE_REF_LANGS = new Set<string>(['typescript', 'javascript', 'tsx', 'go', 'python', 'rust', 'ruby', 'c', 'java', 'csharp', 'php', 'scala', 'kotlin', 'swift', 'dart', 'pascal']);
   private static readonly MAX_VALUE_REF_NODES = 20_000;
   private readonly valueRefsEnabled = process.env.CODEGRAPH_VALUE_REFS !== '0';
   private fileScopeValues = new Map<string, string>();
-  private valueRefScopes: Array<{ id: string; node: SyntaxNode }> = [];
+  private fileScopeValueCounts = new Map<string, number>(); // file-scope nodes per name (conditional-def detection)
+  private valueRefScopes: Array<{ id: string; node: SyntaxNode; name: string }> = [];
   private errors: ExtractionError[] = [];
   private extractor: LanguageExtractor | null = null;
   private nodeStack: string[] = []; // Stack of parent node IDs
@@ -531,12 +610,36 @@ export class TreeSitterExtractor {
    * scopes whose bodies flushValueRefs scans.
    */
   private captureValueRefScope(kind: NodeKind, name: string, id: string, node: SyntaxNode): void {
-    if ((kind === 'constant' || kind === 'variable') && name.length >= 3 && /[A-Z_]/.test(name)) {
+    // Pascal targets `constant` only: its extractor emits function PARAMETERS
+    // (`Dest: TBufferWriter`) and class fields (`declField`) as `variable` at the
+    // enclosing scope, which would otherwise become noisy targets (a param name
+    // shared across many procs collapses to one file-wide target). Genuine
+    // Pascal shared values are `const` (`constant`), so restrict to that. (Unit
+    // `var` globals are the rare cost; the parameter/field noise dominates.)
+    const targetKindOk =
+      this.language === 'pascal' ? kind === 'constant' : kind === 'constant' || kind === 'variable';
+    if (targetKindOk && name.length >= 3 && /[A-Z_]/.test(name)) {
       const parentId = this.nodeStack[this.nodeStack.length - 1];
-      if (parentId?.startsWith('file:')) this.fileScopeValues.set(name, id);
+      // file-scope OR class/module/struct/enum-scope constants are targets.
+      // Class/module scope matters for languages (Ruby) that keep nearly all
+      // constants inside a class or module; struct/enum scope matters for Swift,
+      // which namespaces shared constants in `struct`/`enum` (`enum Constants {
+      // static let X }`). Readers are same-file methods of that type.
+      if (
+        parentId &&
+        (parentId.startsWith('file:') || parentId.startsWith('class:') ||
+          parentId.startsWith('module:') || parentId.startsWith('struct:') ||
+          parentId.startsWith('enum:'))
+      ) {
+        this.fileScopeValues.set(name, id);
+        // How many target nodes carry this name. A conditional def
+        // (`try: X = a; except: X = b`) makes >1 — distinct from a local shadow,
+        // which adds a binding the prune must catch (see flushValueRefs).
+        this.fileScopeValueCounts.set(name, (this.fileScopeValueCounts.get(name) ?? 0) + 1);
+      }
     }
     if (kind === 'function' || kind === 'method' || kind === 'constant' || kind === 'variable') {
-      this.valueRefScopes.push({ id, node });
+      this.valueRefScopes.push({ id, node, name });
     }
   }
 
@@ -551,32 +654,95 @@ export class TreeSitterExtractor {
   private flushValueRefs(): void {
     const scopes = this.valueRefScopes;
     const targets = this.fileScopeValues;
+    const fileScopeCounts = this.fileScopeValueCounts;
     this.valueRefScopes = [];
     this.fileScopeValues = new Map();
+    this.fileScopeValueCounts = new Map();
     if (!this.valueRefsEnabled || !TreeSitterExtractor.VALUE_REF_LANGS.has(this.language)) return;
     if (targets.size === 0 || scopes.length === 0 || isGeneratedFile(this.filePath)) return;
 
-    // Prune SHADOWED targets. A name bound more than once in the file (e.g. a
-    // bundled/Emscripten `const Module` re-declared as an inner `var Module` /
-    // function param) resolves to the INNER binding for nested readers, so a
-    // file-scope edge to it is a false positive. Those inner re-declarations
-    // aren't extracted as graph nodes, so detect them at the syntax level:
-    // count `variable_declarator` names across the tree and drop any target
-    // bound twice or more. Single-binding (unambiguous) names are kept. This
-    // complements the path-based isGeneratedFile() check for content-minified
-    // bundles it can't catch by suffix.
+    // Prune SHADOWED targets. A target re-bound in an INNER scope (a
+    // bundled/Emscripten `const Module` re-declared as a nested `var Module`; a
+    // Go package `const Timeout` shadowed by a local `Timeout := …`; a Python
+    // module `CONFIG` shadowed by a local `CONFIG = …`) resolves to the inner
+    // binding for nested readers, so a file-scope edge is a false positive.
+    // Inner re-bindings aren't graph nodes, so detect them at the syntax level:
+    // count every declarator of the name across the tree and compare against how
+    // many FILE-SCOPE nodes carry it. A real shadow makes (declarators >
+    // file-scope nodes) — the excess is the local binding. A conditional
+    // module-level def (`try: X = a; except: X = b`) makes them EQUAL (both
+    // declarators are file-scope nodes), so it's correctly kept. Complements the
+    // path-based isGeneratedFile() check, which can't catch content-minified
+    // bundles.
+    //
+    // Declarator node types are per-grammar; a file only contains its own
+    // language's nodes, so matching all of them in one switch is safe.
     if (this.tree) {
       const declCounts = new Map<string, number>();
+      const bump = (nameNode: SyntaxNode | null) => {
+        // `simple_identifier` is Kotlin's name node (a property declarator's name).
+        if (nameNode && (nameNode.type === 'identifier' || nameNode.type === 'simple_identifier')) {
+          const nm = getNodeText(nameNode, this.source);
+          if (targets.has(nm)) declCounts.set(nm, (declCounts.get(nm) ?? 0) + 1);
+        }
+      };
       const dstack: SyntaxNode[] = [this.tree.rootNode];
       let dvisited = 0;
       while (dstack.length > 0 && dvisited < TreeSitterExtractor.MAX_VALUE_REF_NODES) {
         const n = dstack.pop()!;
         dvisited++;
-        if (n.type === 'variable_declarator') {
-          const nameNode = n.namedChild(0);
-          if (nameNode && nameNode.type === 'identifier') {
-            const nm = getNodeText(nameNode, this.source);
-            if (targets.has(nm)) declCounts.set(nm, (declCounts.get(nm) ?? 0) + 1);
+        switch (n.type) {
+          case 'variable_declarator': // TS/JS/tsx
+          case 'const_spec':          // Go  `const X = …`
+          case 'var_spec':            // Go  `var X = …`
+            bump(n.namedChild(0));
+            break;
+          case 'const_item':          // Rust  `const X: T = …`
+          case 'static_item':         // Rust  `static X: T = …`
+            bump(getChildByField(n, 'name'));
+            break;
+          case 'let_declaration':       // Rust  `let x = …` (locals — the shadow source)
+          case 'short_var_declaration': // Go    `x, Y := …`
+          case 'assignment': {          // Python `X = …` / `X: T = …` / `A, B = …`
+            const left = getChildByField(n, 'left') ?? getChildByField(n, 'pattern') ?? n.namedChild(0);
+            if (left?.type === 'identifier') bump(left);
+            else if (left) for (const c of left.namedChildren) bump(c);
+            break;
+          }
+          case 'init_declarator':       // C  `T X = …` (file-scope const AND the local that shadows it)
+            bump(cDeclaratorIdentifier(n));
+            break;
+          case 'val_definition':        // Scala  `val X = …` (object/top-level const AND a method-local that shadows it)
+          case 'var_definition': {      // Scala  `var X = …`
+            const pat = getChildByField(n, 'pattern');
+            if (pat?.type === 'identifier') bump(pat);
+            break;
+          }
+          case 'static_final_declaration':         // Dart  top-level/`static` `const`/`final` (the target itself)
+          case 'initialized_identifier':           // Dart  instance field / `var`
+          case 'initialized_variable_definition': { // Dart  a method-local `const`/`final`/`var` that shadows a const
+            const id = n.namedChildren.find((c) => c.type === 'identifier');
+            if (id) bump(id);
+            break;
+          }
+          case 'declConst':  // Pascal  unit/class `const` (the target itself) AND a function-local `const` that shadows it
+          case 'declVar': {  // Pascal  a function-local `var` that shadows a const
+            bump(getChildByField(n, 'name'));
+            break;
+          }
+          case 'property_declaration': { // Kotlin / Swift  `val`/`let X = …` (object/static const AND a method-local that shadows it)
+            // Kotlin: variable_declaration → simple_identifier; Swift: a `pattern`
+            // (`<name>` field) → simple_identifier. Resolve either shape.
+            const vd = n.namedChildren.find((c) => c.type === 'variable_declaration');
+            const id = vd
+              ? vd.namedChildren.find((c) => c.type === 'simple_identifier')
+              : firstSimpleIdentifier(
+                  getChildByField(n, 'name') ??
+                    n.namedChildren.find((c) => c.type === 'value_binding_pattern' || c.type === 'pattern') ??
+                    null,
+                );
+            if (id) bump(id);
+            break;
           }
         }
         for (let i = 0; i < n.namedChildCount; i++) {
@@ -584,20 +750,46 @@ export class TreeSitterExtractor {
           if (c) dstack.push(c);
         }
       }
-      for (const [nm, c] of declCounts) if (c > 1) targets.delete(nm);
+      for (const [nm, c] of declCounts) if (c > (fileScopeCounts.get(nm) ?? 1)) targets.delete(nm);
       if (targets.size === 0) return;
     }
 
     for (const scope of scopes) {
       const seen = new Set<string>();
       const stack: SyntaxNode[] = [scope.node];
+      // Dart and Pascal attach a function/method BODY as a *next sibling* of the
+      // signature node that is stored as the reader scope (Dart `method_signature`
+      // ← `function_body`; Pascal `declProc` ← `block`, both under a `defProc`),
+      // not as a child — so the scope subtree is just the signature and the reads
+      // live in the sibling. Pull it in. (A body as a next sibling of the scope
+      // node is unique to Dart/Pascal among the value-ref languages — every other
+      // grammar nests the body inside the function node — so this is inert
+      // elsewhere.)
+      const sib = scope.node.nextNamedSibling;
+      if (sib && (sib.type === 'function_body' || sib.type === 'block')) stack.push(sib);
       let visited = 0;
       while (stack.length > 0 && visited < TreeSitterExtractor.MAX_VALUE_REF_NODES) {
         const n = stack.pop()!;
         visited++;
-        if (n.type === 'identifier') {
-          const targetId = targets.get(getNodeText(n, this.source));
-          if (targetId && targetId !== scope.id && !seen.has(targetId)) {
+        // `constant` covers Ruby, where both a constant's definition and its
+        // references are `constant`-typed nodes, not `identifier`. `name` covers
+        // PHP, where a constant reference — bare `MAX_ITEMS` or the const half of
+        // `self::MAX_ITEMS` / `Foo::MAX_ITEMS` — is a `name` node (a `$var` local
+        // is a `variable_name`, a different namespace, so it can never shadow a
+        // bare constant — no prune wiring needed). `simple_identifier` covers
+        // Kotlin, whose every name reference (a const read included) is that
+        // node type. Safe across languages: a file only holds its own grammar's
+        // nodes; `name` is PHP-only and `simple_identifier` is Kotlin-only here.
+        if (
+          n.type === 'identifier' || n.type === 'constant' ||
+          n.type === 'name' || n.type === 'simple_identifier'
+        ) {
+          const refName = getNodeText(n, this.source);
+          const targetId = targets.get(refName);
+          // Skip self and same-name targets: a symbol referencing a file-scope
+          // sibling of its own name (the two halves of a conditional `try: X=…;
+          // except: X=…`) is never a meaningful value read.
+          if (targetId && targetId !== scope.id && refName !== scope.name && !seen.has(targetId)) {
             seen.add(targetId);
             this.edges.push({
               source: scope.id,
@@ -750,8 +942,15 @@ export class TreeSitterExtractor {
       skipChildren = true;
     }
     // Check for variable declarations (const, let, var, etc.)
-    // Only extract top-level variables (not inside functions/methods)
-    else if (this.extractor.variableTypes.includes(nodeType) && !this.isInsideClassLikeNode()) {
+    // Only extract top-level variables (not inside functions/methods) — plus
+    // class/module-scope CONSTANTS, which Ruby (and other const-in-class
+    // languages) keep almost exclusively inside a class/module. A Ruby `CONST =
+    // …` has a `constant`-typed LHS; other languages don't put one here, so this
+    // is effectively Ruby-only and doesn't disturb their class-internal locals.
+    else if (
+      this.extractor.variableTypes.includes(nodeType) &&
+      (!this.isInsideClassLikeNode() || this.isClassScopeConstantAssignment(node))
+    ) {
       this.extractVariable(node);
       // extractVariable doesn't walk every initializer shape (object literals
       // are deliberately skipped; Python/Ruby don't walk at all), so scan the
@@ -775,6 +974,21 @@ export class TreeSitterExtractor {
       this.isInsideClassLikeNode()
     ) {
       const ownerId = this.nodeStack[this.nodeStack.length - 1];
+      // A `static let`/`static var` member is a SHARED constant of the type
+      // (Swift's `static`-namespacing idiom, esp. in `enum`/`struct`) — extract
+      // it as `constant`/`variable` so value-reference edges can target it. An
+      // instance stored property stays a `field` (per-instance; Swift instance
+      // properties otherwise aren't own nodes — that's unchanged). A *computed*
+      // property (getter, no stored value) is never a constant — skip the node.
+      const { nameNode, isLet, isComputed } = swiftPropertyInfo(node, this.source);
+      if (nameNode && !isComputed) {
+        const isStatic = this.extractor.isStatic?.(node) ?? false;
+        this.createNode(isStatic ? (isLet ? 'constant' : 'variable') : 'field',
+          getNodeText(nameNode, this.source), node, {
+            visibility: this.extractor.getVisibility?.(node),
+            isStatic,
+          });
+      }
       if (ownerId) {
         this.extractDecoratorsFor(node, ownerId);
         this.extractVariableTypeAnnotation(node, ownerId);
@@ -1060,6 +1274,18 @@ export class TreeSitterExtractor {
     );
   }
 
+  /**
+   * Ruby `CONST = …` assignment whose LHS is a `constant` node — a class/module
+   * (or top-level) constant worth extracting as a symbol even inside a class.
+   * Other languages don't give an assignment a `constant`-typed LHS, so this
+   * gate is effectively Ruby-only.
+   */
+  private isClassScopeConstantAssignment(node: SyntaxNode): boolean {
+    if (node.type !== 'assignment') return false;
+    const left = getChildByField(node, 'left') ?? node.namedChild(0);
+    return left?.type === 'constant';
+  }
+
   /**
    * Extract a function
    */
@@ -1519,6 +1745,17 @@ export class TreeSitterExtractor {
     const visibility = this.extractor.getVisibility?.(node);
     const isStatic = this.extractor.isStatic?.(node) ?? false;
 
+    // A class field that is actually a CONSTANT (Java `static final`, C# `const`
+    // / `static readonly`) is extracted as `constant` kind, not `field`, so
+    // value-reference edges treat it as a target (the gate accepts
+    // constant/variable, not field). Scoped to languages whose `isConst`
+    // predicate is field-shaped — other languages' fields stay `field`.
+    const fieldKind: NodeKind =
+      (this.language === 'java' || this.language === 'csharp') &&
+      (this.extractor.isConst?.(node) ?? false)
+        ? 'constant'
+        : 'field';
+
     // Java field_declaration: "private final String name = value;" → variable_declarator(s) are direct children
     // C# field_declaration: wraps in variable_declaration → variable_declarator(s)
     let declarators = node.namedChildren.filter(
@@ -1579,7 +1816,7 @@ export class TreeSitterExtractor {
         if (!nameNode) continue;
         const name = getNodeText(nameNode, this.source);
         const signature = typeText ? `${typeText} ${name}` : name;
-        const fieldNode = this.createNode('field', name, decl, {
+        const fieldNode = this.createNode(fieldKind, name, decl, {
           docstring,
           signature,
           visibility,
@@ -1603,7 +1840,7 @@ export class TreeSitterExtractor {
         || node.namedChildren.find(c => c.type === 'identifier');
       if (nameNode) {
         const name = getNodeText(nameNode, this.source);
-        this.createNode('field', name, node, {
+        this.createNode(fieldKind, name, node, {
           docstring,
           visibility,
           isStatic,
@@ -1813,7 +2050,9 @@ export class TreeSitterExtractor {
       const left = getChildByField(node, 'left') || node.namedChild(0);
       const right = getChildByField(node, 'right') || node.namedChild(1);
 
-      if (left && left.type === 'identifier') {
+      // Ruby constant assignments (`MAX = 3`) have a `constant`-typed LHS, not
+      // `identifier`; without this they were never extracted as symbols at all.
+      if (left && (left.type === 'identifier' || left.type === 'constant')) {
         const name = getNodeText(left, this.source);
         // Skip if name starts with lowercase and looks like a function call result
         // Python constants are usually UPPER_CASE
@@ -1903,6 +2142,63 @@ export class TreeSitterExtractor {
         const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
         this.createNode(kind, name, nameNode, { docstring, signature: initSignature, isExported });
       });
+    } else if (this.language === 'c') {
+      // C: a `declaration` node's name nests inside the `declarator` field —
+      // `init_declarator` (with value) or bare/pointer/array declarators (no
+      // value); a `function_declarator` is a prototype, not a variable. The
+      // generic fallback below only finds a *direct* identifier child, which C
+      // never has, so file-scope consts/globals went unextracted entirely (and
+      // so had no impact-radius edges). Only file-scope declarations are tracked
+      // — locals inside a function body are skipped (a `static const` table read
+      // by same-file functions is the value the impact graph wants, not every
+      // block-local). C allows several declarators per declaration
+      // (`int a = 1, b = 2;`), so iterate them.
+      if (!hasFunctionAncestor(node)) {
+        for (let i = 0; i < node.namedChildCount; i++) {
+          const child = node.namedChild(i);
+          if (!child) continue;
+          // Accept only `init_declarator` (has a value) and pointer/array
+          // declarators. A *bare* `identifier` declarator is deliberately
+          // skipped: an unknown leading macro (`CURL_EXTERN`, `XXH_PUBLIC_API`)
+          // makes tree-sitter-c misparse a prototype `MACRO RetType fn(args);`
+          // as a declaration whose "variable" is the bare return-type
+          // identifier, splitting `fn(args)` off as a bogus expression — minting
+          // a spurious type-named global for every macro-prefixed prototype in a
+          // header. Those misparses are always bare identifiers; real
+          // consts/tables always carry an initializer. The only legit loss is
+          // uninitialized scalar globals (`static int g;`).
+          if (
+            child.type !== 'init_declarator' &&
+            child.type !== 'pointer_declarator' &&
+            child.type !== 'array_declarator'
+          ) {
+            continue;
+          }
+          const nameNode = cDeclaratorIdentifier(child);
+          if (!nameNode) continue;
+          const name = getNodeText(nameNode, this.source);
+          if (!name) continue;
+          const valueNode =
+            child.type === 'init_declarator' ? getChildByField(child, 'value') : null;
+          const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
+          const initSignature = initValue
+            ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}`
+            : undefined;
+          this.createNode(kind, name, child, { docstring, signature: initSignature, isExported });
+        }
+      }
+    } else if (this.language === 'swift') {
+      // Swift top-level property (`let X = …` / `var Y = …`). The name nests in
+      // a `pattern`, which the generic fallback can't read, so top-level Swift
+      // constants/globals went unextracted. A top-level `let`→`constant`,
+      // `var`→`variable`; a computed property (getter, no value) is skipped.
+      const { nameNode, isLet, isComputed } = swiftPropertyInfo(node, this.source);
+      if (nameNode && !isComputed) {
+        this.createNode(isLet ? 'constant' : 'variable', getNodeText(nameNode, this.source), node, {
+          docstring,
+          isExported,
+        });
+      }
     } else {
       // Generic fallback for other languages
       // Try to find identifier children