пре 5 дана · 798cd0e21c
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
				 
			
 
				 ### New Features
			
 
				 
			
 
				+- `codegraph_explore` now surfaces the right code in large multi-layer projects. When you ask a backend-flow question in a repo that pairs an API server with a big frontend that mirrors the same domain words — say an `app/` admin UI sitting over an `api/` server — the server-side file that genuinely matches several of your query's terms is no longer pushed out of the results by the larger, more interconnected frontend layer. A file corroborated by two or more distinct query terms is now kept in the answer even when a denser unrelated layer would otherwise crowd it out, so "how does X read items / handle the request" returns the service or handler that does the work instead of a wall of frontend views. Single-layer projects are unaffected; set `CODEGRAPH_RANK_NO_MULTITERM=1` to revert to the previous ranking.
			
 
				 - Impact and blast-radius analysis for TypeScript, JavaScript, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, and Pascal/Delphi now understands the readers of a constant. When you change a file-scope, package-level, module-level, or class-level constant — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
			
 
				 - C file-scope constants and globals — `static const` scalars, pointer/array lookup tables, and shared mutable globals — are now recognized as symbols in their own right. They previously weren't extracted at all, so they never appeared in search or carried any dependents; now they show up in `codegraph search` and participate in impact analysis (see above), so changing a C lookup table surfaces the same-file functions that read it.
			
 
				 - Java `static final` constants, C# `const` / `static readonly` constants, Scala `object` vals, and Kotlin top-level / `object` / `companion object` `val`s are now classified as constants rather than generic fields, so they participate in the constant-reader impact analysis above — change a `public static final` table, a `const string`, a Scala `object Config { val Timeout = … }`, or a Kotlin `companion object { const val … }` and the methods that read it now show up as affected. (Per-object Java `final` / C# `readonly` / Scala & Kotlin `class` instance properties are unchanged.) Kotlin constants were previously not indexed as their own symbols at all, so they now also appear in `codegraph search`.
			
--- a/__tests__/explore-corroboration-ranking.test.ts
+++ b/__tests__/explore-corroboration-ranking.test.ts
@@ -0,0 +1,121 @@
 
				+/**
			
 
				+ * codegraph_explore — multi-term corroboration tier (cross-layer monorepo ranking).
			
 
				+ *
			
 
				+ * BEHAVIOURAL coverage for the `isCorroborated` tier in handleExplore's file sort:
			
 
				+ * a backend file that is BOTH an entry/central file AND matched by >=2 DISTINCT
			
 
				+ * query terms must be surfaced (rendered as a `#### <path>` source section) for a
			
 
				+ * backend-flow query in a multi-layer repo — not displaced by a denser frontend
			
 
				+ * layer. The tier exists because explore's primary file ranker is graph-centrality
			
 
				+ * (Random-Walk-with-Restart) mass, which — seeded from text matches that skew to
			
 
				+ * the bigger, internally dense layer — can bury a query-matching backend file under
			
 
				+ * an off-topic cluster. The entry/central GUARD keeps the tier safe: an INCIDENTAL
			
 
				+ * multi-term file that is neither entry nor central is NOT promoted, so it cannot
			
 
				+ * displace a graph-central answer file (the regression a blunt hits-only tier caused
			
 
				+ * on excalidraw, where `binding.ts`/`elbowArrow.ts` displaced `renderNewElementScene`).
			
 
				+ *
			
 
				+ * NOTE: the full directus-scale burial (where frontend RWR mass exceeds a
			
 
				+ * query-matching backend file) is an EMERGENT property of thousands of real frontend
			
 
				+ * symbols — a self-contained fixture can't reach the cluster size past
			
 
				+ * findRelevantContext's retrieval cap. That regression is isolated by the
			
 
				+ * deterministic ranking harness on real indexes (directus/n8n/excalidraw), where the
			
 
				+ * api/ service moves from "absent/mentioned" to "sourced" with no control regression.
			
 
				+ * These tests lock the user-visible behaviour the tier guarantees on a fixture.
			
 
				+ */
			
 
				+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
			
 
				+import * as fs from 'fs';
			
 
				+import * as path from 'path';
			
 
				+import * as os from 'os';
			
 
				+import CodeGraph from '../src/index';
			
 
				+import { ToolHandler } from '../src/mcp/tools';
			
 
				+
			
 
				+/** Paths that explore rendered as full-body `#### <path> —` source sections. */
			
 
				+function sourcedFiles(text: string): string[] {
			
 
				+  const out: string[] = [];
			
 
				+  for (const line of text.split('\n')) {
			
 
				+    const m = line.match(/^#### (.+?) —/);
			
 
				+    if (m) out.push(m[1].trim());
			
 
				+  }
			
 
				+  return out;
			
 
				+}
			
 
				+
			
 
				+describe('codegraph_explore — multi-term corroboration tier', () => {
			
 
				+  let testDir: string;
			
 
				+  let cg: CodeGraph;
			
 
				+  let handler: ToolHandler;
			
 
				+
			
 
				+  beforeEach(async () => {
			
 
				+    testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-corrob-'));
			
 
				+
			
 
				+    // --- The large, internally DENSE frontend layer ---------------------------
			
 
				+    // Many `app/` files whose SYMBOLS all match the word "item" and form a tight
			
 
				+    // call mesh, so Random-Walk-with-Restart mass (seeded from those text matches)
			
 
				+    // concentrates here. They are NOT the answer to a backend query — but at scale
			
 
				+    // their cluster mass out-ranks the call-isolated backend file.
			
 
				+    // "item" is a PATH token (app/item/...) so FTS (token-based, not substring)
			
 
				+    // retrieves every file for the query term "item" — matching directus's `app/`
			
 
				+    // tree where "item" is a real path/symbol token, not a camelCase fragment.
			
 
				+    const appItem = path.join(testDir, 'app', 'item');
			
 
				+    fs.mkdirSync(appItem, { recursive: true });
			
 
				+    const N = 30;
			
 
				+    for (let i = 0; i < N; i++) {
			
 
				+      const next = (i + 1) % N;
			
 
				+      const prev = (i + N - 1) % N;
			
 
				+      // Each file imports two neighbours → a dense mesh of `references`/`calls`.
			
 
				+      // snake_case so FTS tokenizes "item" out of the symbol name (camelCase would
			
 
				+      // leave `itemview0` as a single unmatchable token).
			
 
				+      fs.writeFileSync(path.join(appItem, `view${i}.ts`),
			
 
				+        `import { item_view_${next} } from './view${next}';\n` +
			
 
				+        `import { item_view_${prev} } from './view${prev}';\n` +
			
 
				+        `export function item_view_${i}() {\n` +
			
 
				+        `  return item_view_${next}() + item_view_${prev}();\n` +
			
 
				+        `}\n`);
			
 
				+    }
			
 
				+
			
 
				+    // --- The small, call-ISOLATED backend file (the answer) -------------------
			
 
				+    // Its PATH matches TWO distinct query terms (api/item/service.ts → item +
			
 
				+    // service), so it IS a search root (an entry file) with file-term-hits >=2 —
			
 
				+    // but its generic SYMBOLS don't text-match, and nothing in the frontend mesh
			
 
				+    // calls it, so it gets no RWR inflow and its restart mass is diluted across the
			
 
				+    // large frontend seed set. This is the directus shape: ItemsService is
			
 
				+    // search-relevant by name/path yet call-isolated from the frontend seed cluster,
			
 
				+    // so RWR alone buries it under the mesh. Only the corroboration tier (path/name
			
 
				+    // matches >=2 query terms AND it's an entry file) keeps it in.
			
 
				+    const apiItem = path.join(testDir, 'api', 'item');
			
 
				+    fs.mkdirSync(apiItem, { recursive: true });
			
 
				+    fs.writeFileSync(path.join(apiItem, 'service.ts'),
			
 
				+      `export class DataService {\n` +
			
 
				+      `  read() { return this.load(); }\n` +
			
 
				+      `  load(): string[] { return []; }\n` +
			
 
				+      `}\n`);
			
 
				+
			
 
				+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
			
 
				+    await cg.indexAll();
			
 
				+    handler = new ToolHandler(cg);
			
 
				+  });
			
 
				+
			
 
				+  afterEach(() => {
			
 
				+    if (cg) cg.destroy();
			
 
				+    if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
			
 
				+  });
			
 
				+
			
 
				+  it('sources the corroborated backend file alongside a denser frontend cluster in a multi-layer repo', async () => {
			
 
				+    const res = await handler.execute('codegraph_explore', { query: 'item service' });
			
 
				+    const text = res.content[0].text;
			
 
				+    const sourced = sourcedFiles(text);
			
 
				+
			
 
				+    // The backend service — matched by item+service and a search root — must
			
 
				+    // be rendered, not truncated out by the frontend mesh's graph mass.
			
 
				+    expect(sourced).toContain('api/item/service.ts');
			
 
				+  });
			
 
				+
			
 
				+  it('still leads with the backend file when the query names its symbol directly', async () => {
			
 
				+    // A query naming the backend symbol directly: the answer is the DataService
			
 
				+    // file; the frontend mesh stays subordinate (it matches only "item").
			
 
				+    const res = await handler.execute('codegraph_explore', { query: 'DataService read load' });
			
 
				+    const text = res.content[0].text;
			
 
				+    const sourced = sourcedFiles(text);
			
 
				+    expect(sourced).toContain('api/item/service.ts');
			
 
				+    // The named backend file leads — it is not displaced by the frontend layer.
			
 
				+    expect(sourced[0]).toBe('api/item/service.ts');
			
 
				+  });
			
 
				+});
			
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -2327,6 +2327,26 @@ export class ToolHandler {
 
				       if (n) namedSeedFiles.add(n.filePath);
			
 
				     }
			
 
				 
			
 
				+    // Multi-term corroboration tier: a file that is BOTH (a) an entry/central file
			
 
				+    // (a search root, named seed, or graph-central hub — i.e. structurally part of
			
 
				+    // the answer) AND (b) matched by ≥2 DISTINCT query terms must not be buried by
			
 
				+    // graph-centrality mass that accrued to a denser-but-off-topic cluster. In a
			
 
				+    // cross-layer monorepo (an API server alongside a much larger, internally dense
			
 
				+    // frontend that mirrors the same domain words) the Random-Walk-with-Restart mass
			
 
				+    // — seeded from text matches that skew to the bigger layer — floats hits=0
			
 
				+    // frontend files above the hits=2/3 backend service that IS the answer (its many
			
 
				+    // callers don't help: it's call-isolated from the frontend seed cluster). The
			
 
				+    // entry/central GUARD keeps this safe: an INCIDENTAL multi-term file that is
			
 
				+    // neither entry nor central (a type/util file that matches "element"+x but isn't
			
 
				+    // the flow) is NOT promoted, so it can't displace the graph-central answer file
			
 
				+    // (hits=1) the way a blunt hits-only tier would. Single-layer repos with one
			
 
				+    // cluster are unaffected (no competing mass). Set CODEGRAPH_RANK_NO_MULTITERM=1
			
 
				+    // to disable.
			
 
				+    const MULTITERM_OFF = process.env.CODEGRAPH_RANK_NO_MULTITERM === '1';
			
 
				+    const isCorroborated = (fp: string) =>
			
 
				+      !MULTITERM_OFF &&
			
 
				+      (fileTermHits.get(fp) ?? 0) >= 2 &&
			
 
				+      (entryFiles.has(fp) || centralFiles.has(fp));
			
 
				     const sortedFiles = relevantFiles.sort((a, b) => {
			
 
				       const aPath = a[0].toLowerCase();
			
 
				       const bPath = b[0].toLowerCase();
			
@@ -2336,6 +2356,11 @@ export class ToolHandler {
 
				       const bNamed = namedSeedFiles.has(b[0]) ? 1 : 0;
			
 
				       if (aNamed !== bNamed) return bNamed - aNamed;
			
 
				 
			
 
				+      // Corroborated (entry/central + ≥2 terms) tier, above the graph signal.
			
 
				+      const aCorr = isCorroborated(a[0]) ? 1 : 0;
			
 
				+      const bCorr = isCorroborated(b[0]) ? 1 : 0;
			
 
				+      if (aCorr !== bCorr) return bCorr - aCorr;
			
 
				+
			
 
				       // Graph connectivity is the next key (small epsilon so near-ties fall
			
 
				       // through to the text signal rather than coin-flipping on float noise).
			
 
				       const aG = fileGraphScore.get(a[0]) ?? 0;