3 недель назад · bb534d574c
--- a/scripts/agent-eval/probe-sweep.mjs
+++ b/scripts/agent-eval/probe-sweep.mjs
@@ -0,0 +1,119 @@
 
															+#!/usr/bin/env node
														
 
															+// probe-sweep — direct MCP test across N repos × N tools, no claude needed.
														
 
															+//
														
 
															+// Measures response characteristics (size, sections present, signals fired)
														
 
															+// for each (repo, query) pair against the built dist/. Sub-second per probe;
														
 
															+// the full sweep below runs in ~10-30s vs hours for a real claude audit.
														
 
															+//
														
 
															+// Use this to iterate on backend changes rapidly: change tools.ts /
														
 
															+// context-builder, npm run build, re-run probe-sweep, compare. Once a
														
 
															+// change looks good on probe metrics, run a focused claude audit for the
														
 
															+// few repos that matter to confirm end-to-end cost behavior.
														
 
															+//
														
 
															+// Usage: node scripts/agent-eval/probe-sweep.mjs [--tool=context|explore|trace] [--repos=a,b,c]
														
 
															+import { pathToFileURL } from 'node:url';
														
 
															+import { resolve } from 'node:path';
														
 
															+
														
 
															+const args = Object.fromEntries(
														
 
															+  process.argv.slice(2).map(a => a.startsWith('--') ? a.slice(2).split('=') : [a, true])
														
 
															+);
														
 
															+const TOOL = args.tool ?? 'context';
														
 
															+
														
 
															+const load = (rel) => import(pathToFileURL(resolve(rel)).href);
														
 
															+const idx = await load('dist/index.js');
														
 
															+const tools = await load('dist/mcp/tools.js');
														
 
															+const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
														
 
															+const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
														
 
															+
														
 
															+// Each entry: repo, query, optional 2nd arg for trace (from, to).
														
 
															+// The query is the same prompt used in the real claude audits, so probe
														
 
															+// output is directly comparable to the agent's would-be input.
														
 
															+const SWEEP = [
														
 
															+  // Small realworld template repos (the loss cases from the cross-language sweep)
														
 
															+  { id: 'gin-rw',        repo: '/tmp/codegraph-corpus/gin-realworld',         q: 'How does this Gin app route a request through its middleware chain to a handler?' },
														
 
															+  { id: 'go-mux',        repo: '/tmp/codegraph-corpus/go-mux',                q: 'How does this gorilla/mux app route a request to its handler?' },
														
 
															+  { id: 'fastapi-rw',    repo: '/tmp/codegraph-corpus/fastapi-realworld',     q: 'How does FastAPI route a request through its dependencies to a handler?' },
														
 
															+  { id: 'spring-pc',     repo: '/tmp/codegraph-corpus/spring-petclinic',      q: 'How does Spring route an HTTP request to a controller method?' },
														
 
															+  { id: 'axum-rw',       repo: '/tmp/codegraph-corpus/rust-axum-realworld',   q: 'How does Axum route a request to its handler in this app?' },
														
 
															+  { id: 'express-rw',    repo: '/tmp/codegraph-corpus/express-realworld',     q: 'How does this Express app route a request through middleware to a handler?' },
														
 
															+  { id: 'kotlin-pc',     repo: '/tmp/codegraph-corpus/kotlin-petclinic',      q: 'How does the Kotlin Spring app route an HTTP request to its handler?' },
														
 
															+  { id: 'flask-mb',      repo: '/tmp/codegraph-corpus/flask-microblog',       q: 'How does this Flask app route a request to a view function?' },
														
 
															+  { id: 'vapor-tpl',     repo: '/tmp/codegraph-corpus/vapor-template',        q: 'How does Vapor route an HTTP request to its handler?' },
														
 
															+  { id: 'cpp-leveldb',   repo: '/tmp/codegraph-corpus/cpp-leveldb',           q: 'How does LevelDB handle a Put operation through to disk?' },
														
 
															+  { id: 'lualine',       repo: '/tmp/codegraph-corpus/lualine.nvim',          q: 'How does lualine assemble and render the statusline?' },
														
 
															+  { id: 'drupal-admin',  repo: '/tmp/codegraph-corpus/drupal-admintoolbar',   q: 'How does the Drupal admin toolbar module render its toolbar?' },
														
 
															+  { id: 'svelte-rw',     repo: '/tmp/codegraph-corpus/svelte-realworld',      q: 'How does this SvelteKit app route a request to a handler?' },
														
 
															+  { id: 'react-rw',      repo: '/tmp/codegraph-corpus/react-realworld',       q: 'How does this React app fetch and display articles?' },
														
 
															+  { id: 'rails-rw',      repo: '/tmp/codegraph-corpus/rails-realworld',       q: 'How does Rails route a request to a controller action?' },
														
 
															+  { id: 'flask-rest',    repo: '/tmp/codegraph-corpus/flask-restful-realworld', q: 'How does Flask-RESTful route a request to a resource method?' },
														
 
															+  { id: 'laravel-rw',    repo: '/tmp/codegraph-corpus/laravel-realworld',     q: 'How does Laravel route a request to the controller method?' },
														
 
															+  { id: 'aspnet-rw',     repo: '/tmp/codegraph-corpus/aspnet-realworld',      q: 'How does ASP.NET route a request to the controller action?' },
														
 
															+  // The iter7 wins/ties (to make sure we don't regress)
														
 
															+  { id: 'cobra',         repo: '/tmp/codegraph-corpus/cobra',                 q: 'How does cobra parse commands and flags?' },
														
 
															+  { id: 'sinatra',       repo: '/tmp/codegraph-corpus/sinatra',               q: 'How does sinatra route a request to its handler?' },
														
 
															+  { id: 'slim',          repo: '/tmp/codegraph-corpus/slim',                  q: 'How does slim route a request and apply middleware?' },
														
 
															+];
														
 
															+
														
 
															+// Detect signals in response text — these are the levers we've added that
														
 
															+// otherwise only show up via "agent ran X more tool calls" downstream.
														
 
															+const detect = (text) => ({
														
 
															+  hasEntryPoints: /^### Entry Points/m.test(text),
														
 
															+  hasRelatedSymbols: /^### Related Symbols/m.test(text),
														
 
															+  hasFlowTrace: /^## Inline flow trace/m.test(text),
														
 
															+  hasRouteManifest: /^## Routing manifest/m.test(text),
														
 
															+  hasTopHandler: /^### Top handler file/m.test(text),
														
 
															+  hasSmallRepoTail: /This project is small/.test(text),
														
 
															+});
														
 
															+
														
 
															+const filterRepos = args.repos ? new Set(String(args.repos).split(',')) : null;
														
 
															+const subjects = SWEEP.filter(s => !filterRepos || filterRepos.has(s.id));
														
 
															+
														
 
															+const t0 = Date.now();
														
 
															+const rows = [];
														
 
															+for (const s of subjects) {
														
 
															+  try {
														
 
															+    const cg = CodeGraph.openSync(s.repo);
														
 
															+    const handler = new ToolHandler(cg);
														
 
															+    const t1 = Date.now();
														
 
															+    const res = await handler.execute('codegraph_' + TOOL,
														
 
															+      TOOL === 'context' ? { task: s.q } :
														
 
															+      TOOL === 'explore' ? { query: s.q } : { from: 'main', to: 'main' });
														
 
															+    const text = res.content?.[0]?.text ?? '';
														
 
															+    const signals = detect(text);
														
 
															+    rows.push({
														
 
															+      id: s.id,
														
 
															+      ms: Date.now() - t1,
														
 
															+      chars: text.length,
														
 
															+      lines: text.split('\n').length,
														
 
															+      ...signals,
														
 
															+    });
														
 
															+    try { cg.close?.(); } catch {}
														
 
															+  } catch (e) {
														
 
															+    rows.push({ id: s.id, error: String(e).slice(0, 80) });
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+// Pretty-print as a compact table.
														
 
															+const fmt = (r) =>
														
 
															+  r.error
														
 
															+    ? `  ${r.id.padEnd(13)} ERROR: ${r.error}`
														
 
															+    : `  ${r.id.padEnd(13)} ${String(r.chars).padStart(6)}c ${String(r.lines).padStart(4)}L ${String(r.ms).padStart(4)}ms` +
														
 
															+      ` ${r.hasEntryPoints ? 'EP ' : '   '}` +
														
 
															+      `${r.hasFlowTrace ? 'TRC ' : '    '}` +
														
 
															+      `${r.hasRouteManifest ? 'MAN ' : '    '}` +
														
 
															+      `${r.hasTopHandler ? 'HND ' : '    '}` +
														
 
															+      `${r.hasSmallRepoTail ? 'TAIL' : '    '}`;
														
 
															+console.log(`=== probe-sweep tool=${TOOL} n=${subjects.length} (${Date.now() - t0}ms total) ===`);
														
 
															+console.log('  id            chars  lines    ms signals');
														
 
															+console.log('  ' + '-'.repeat(56));
														
 
															+for (const r of rows) console.log(fmt(r));
														
 
															+
														
 
															+// Sum + medians for the size pillar
														
 
															+const sizes = rows.filter(r => !r.error).map(r => r.chars);
														
 
															+sizes.sort((a, b) => a - b);
														
 
															+const median = sizes[Math.floor(sizes.length / 2)];
														
 
															+const sum = sizes.reduce((a, b) => a + b, 0);
														
 
															+console.log(`  ${'-'.repeat(64)}`);
														
 
															+console.log(`  median=${median}c  total=${sum}c  ` +
														
 
															+  `manifest=${rows.filter(r => r.hasRouteManifest).length}/${rows.filter(r => !r.error).length}  ` +
														
 
															+  `top-handler=${rows.filter(r => r.hasTopHandler).length}/${rows.filter(r => !r.error).length}`);
														
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -209,6 +209,8 @@ export class QueryBuilder {
 
															     getAllFilePaths?: SqliteStatement;
														
 
															     getAllNodeNames?: SqliteStatement;
														
 
															     getDominantFile?: SqliteStatement;
														
 
															+    getTopRouteFile?: SqliteStatement;
														
 
															+    getRoutingManifest?: SqliteStatement;
														
 
															   } = {};
														
 
															   constructor(db: SqliteDatabase) {
														
@@ -564,6 +566,110 @@ export class QueryBuilder {
 
															     };
														
 
															   }
														
 
															+  /**
														
 
															+   * Find the file that holds the densest concentration of the project's
														
 
															+   * `route` nodes (framework-emitted: Express/Gin/Flask/Rails/Drupal/etc.).
														
 
															+   * Used by handleContext on small repos to inline the project's routing
														
 
															+   * config when the agent's query is about request flow — eliminating the
														
 
															+   * "Glob + Read routes.rb" pattern that beats codegraph on tiny realworld
														
 
															+   * template repos.
														
 
															+   *
														
 
															+   * Excludes test/generated files from candidacy. Returns null if there
														
 
															+   * are fewer than 3 non-test routes total, or if no file holds at least
														
 
															+   * 30% of them (diffuse routing → no single answer file).
														
 
															+   */
														
 
															+  getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null {
														
 
															+    if (!this.stmts.getTopRouteFile) {
														
 
															+      this.stmts.getTopRouteFile = this.db.prepare(`
														
 
															+        SELECT file_path, COUNT(*) AS cnt
														
 
															+        FROM nodes
														
 
															+        WHERE kind = 'route'
														
 
															+        GROUP BY file_path
														
 
															+        ORDER BY cnt DESC
														
 
															+        LIMIT 20
														
 
															+      `);
														
 
															+    }
														
 
															+    const rows = this.stmts.getTopRouteFile.all() as Array<{ file_path: string; cnt: number }>;
														
 
															+    const filtered = rows.filter(r => !isLowValueFile(r.file_path));
														
 
															+    if (filtered.length === 0) return null;
														
 
															+    const totalRoutes = filtered.reduce((sum, r) => sum + r.cnt, 0);
														
 
															+    const top = filtered[0]!;
														
 
															+    if (totalRoutes < 3 || top.cnt < 3) return null;
														
 
															+    if (top.cnt / totalRoutes < 0.30) return null;
														
 
															+    return { filePath: top.file_path, routeCount: top.cnt, totalRoutes };
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Build a URL → handler manifest from the index. Each route node's
														
 
															+   * `references` edge points at the function/method that handles the
														
 
															+   * request. We join them in one pass; the agent gets the canonical
														
 
															+   * routing answer ("POST /users/login → AuthController#login") without
														
 
															+   * having to parse the framework's route DSL itself.
														
 
															+   *
														
 
															+   * Also returns the file with the most handler endpoints — used as the
														
 
															+   * "top handler file" to inline source for, so the agent has both the
														
 
															+   * mapping AND the handler implementations.
														
 
															+   */
														
 
															+  getRoutingManifest(limit: number = 40): {
														
 
															+    entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>;
														
 
															+    topHandlerFile: string | null;
														
 
															+    topHandlerFileCount: number;
														
 
															+    totalRoutes: number;
														
 
															+  } | null {
														
 
															+    if (!this.stmts.getRoutingManifest) {
														
 
															+      // Edge kind varies across framework resolvers: Spring/Rails/
														
 
															+      // Laravel/Drupal emit `references`, Express emits `calls`. Accept
														
 
															+      // both — the semantic is the same (route → its handler).
														
 
															+      this.stmts.getRoutingManifest = this.db.prepare(`
														
 
															+        SELECT
														
 
															+          r.name AS url,
														
 
															+          h.name AS handler,
														
 
															+          h.file_path AS handler_file,
														
 
															+          h.start_line AS handler_line,
														
 
															+          h.kind AS handler_kind
														
 
															+        FROM nodes r
														
 
															+        JOIN edges e ON e.source = r.id
														
 
															+        JOIN nodes h ON e.target = h.id
														
 
															+        WHERE r.kind = 'route'
														
 
															+          AND e.kind IN ('references', 'calls')
														
 
															+          AND h.kind IN ('function', 'method', 'class')
														
 
															+        ORDER BY r.file_path, r.start_line
														
 
															+        LIMIT ?
														
 
															+      `);
														
 
															+    }
														
 
															+    const rows = this.stmts.getRoutingManifest.all(limit) as Array<{
														
 
															+      url: string; handler: string; handler_file: string; handler_line: number; handler_kind: string;
														
 
															+    }>;
														
 
															+    // Drop test/generated handlers — same hygiene as elsewhere.
														
 
															+    const filtered = rows.filter(r => !isLowValueFile(r.handler_file));
														
 
															+    if (filtered.length < 3) return null;
														
 
															+    // Identify the file holding the most handlers (the "primary handler file").
														
 
															+    const fileCounts = new Map<string, number>();
														
 
															+    for (const r of filtered) {
														
 
															+      fileCounts.set(r.handler_file, (fileCounts.get(r.handler_file) ?? 0) + 1);
														
 
															+    }
														
 
															+    let topHandlerFile: string | null = null;
														
 
															+    let topHandlerFileCount = 0;
														
 
															+    for (const [file, count] of fileCounts) {
														
 
															+      if (count > topHandlerFileCount) {
														
 
															+        topHandlerFile = file;
														
 
															+        topHandlerFileCount = count;
														
 
															+      }
														
 
															+    }
														
 
															+    return {
														
 
															+      entries: filtered.map(r => ({
														
 
															+        url: r.url,
														
 
															+        handler: r.handler,
														
 
															+        handlerFile: r.handler_file,
														
 
															+        handlerLine: r.handler_line,
														
 
															+        handlerKind: r.handler_kind,
														
 
															+      })),
														
 
															+      topHandlerFile,
														
 
															+      topHandlerFileCount,
														
 
															+      totalRoutes: filtered.length,
														
 
															+    };
														
 
															+  }
														
 
															+
														
 
															   /**
														
 
															    * Get all nodes of a specific kind
														
 
															    */
														
--- a/src/index.ts
+++ b/src/index.ts
@@ -683,6 +683,33 @@ export class CodeGraph {
 
															     return this.queries.searchNodes(query, options);
														
 
															   }
														
 
															+  /**
														
 
															+   * Find the project's "primary route file" — the file with the densest
														
 
															+   * concentration of framework-emitted `route` nodes (≥3 routes, ≥30%
														
 
															+   * of all non-test routes). Used to inline the routing config in
														
 
															+   * `codegraph_context` responses on small realworld template repos
														
 
															+   * (rails-realworld, laravel-realworld, drupal-admintoolbar, …) where
														
 
															+   * Glob+Read of `routes.rb`/`urls.py`/etc. otherwise beats codegraph.
														
 
															+   */
														
 
															+  getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null {
														
 
															+    return this.queries.getTopRouteFile();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Build a URL → handler routing manifest from the index. Each entry
														
 
															+   * pairs a route node (URL + method) with its handler function/method
														
 
															+   * via the `references` edge that framework resolvers emit. Returns
														
 
															+   * null when fewer than 3 valid (non-test) routes exist.
														
 
															+   */
														
 
															+  getRoutingManifest(limit?: number): {
														
 
															+    entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>;
														
 
															+    topHandlerFile: string | null;
														
 
															+    topHandlerFileCount: number;
														
 
															+    totalRoutes: number;
														
 
															+  } | null {
														
 
															+    return this.queries.getRoutingManifest(limit);
														
 
															+  }
														
 
															+
														
 
															   // ===========================================================================
														
 
															   // Edge Operations
														
 
															   // ===========================================================================
														
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -21,11 +21,13 @@ import {
 
															   lstatSync,
														
 
															   openSync,
														
 
															   readFileSync,
														
 
															+  statSync,
														
 
															   writeSync,
														
 
															 } from 'fs';
														
 
															 import { clamp, validatePathWithinRoot, validateProjectPath } from '../utils';
														
 
															 import { isGeneratedFile } from '../extraction/generated-detection';
														
 
															 import { tmpdir } from 'os';
														
 
															+import * as pathModule from 'path';
														
 
															 import { join, resolve as resolvePath } from 'path';
														
 
															 /** Maximum output length to prevent context bloat (characters) */
														
@@ -1167,18 +1169,80 @@ export class ToolHandler {
 
															     // is structurally the same as cobra's; both deserve the same
														
 
															     // sufficiency steering.
														
 
															     let smallRepoTail = '';
														
 
															+    let smallRepoRouteInline = '';
														
 
															     if (isTinyRepo || isSmallRepo) {
														
 
															+      // Iter12: backend-computed routing manifest for routing queries.
														
 
															+      // Builds a URL → handler map directly from the graph (each route
														
 
															+      // node has a `references` edge to its handler), then inlines the
														
 
															+      // top handler file's source. The agent gets the canonical
														
 
															+      // routing answer in one MCP call — no need to parse framework
														
 
															+      // DSL or grep for handlers.
														
 
															+      //
														
 
															+      // Replaces iter10's raw route-file inline. The manifest is more
														
 
															+      // information-dense (parsed URL→handler map vs raw config DSL)
														
 
															+      // and we still inline the top handler file's source so the agent
														
 
															+      // has the implementation bodies inline too.
														
 
															+      const isRouteQuery = /\b(route|routes|routing|request|handler|endpoint|api|controller|middleware|dispatch|invok)/i.test(task);
														
 
															+      if (isRouteQuery) {
														
 
															+        try {
														
 
															+          const manifest = cg.getRoutingManifest(40);
														
 
															+          if (manifest) {
														
 
															+            // 1) Compact URL→handler list (~30-60 lines, ~1-2KB).
														
 
															+            const lines: string[] = [
														
 
															+              `\n\n## Routing manifest (${manifest.totalRoutes} routes, top handler file holds ${manifest.topHandlerFileCount})`,
														
 
															+              '',
														
 
															+              '| URL | Handler | Location |',
														
 
															+              '|---|---|---|',
														
 
															+            ];
														
 
															+            for (const e of manifest.entries) {
														
 
															+              lines.push(`| \`${e.url}\` | \`${e.handler}\` | ${e.handlerFile}:${e.handlerLine} |`);
														
 
															+            }
														
 
															+            // 2) Inline the top handler file's source.
														
 
															+            if (manifest.topHandlerFile && manifest.topHandlerFileCount >= 2) {
														
 
															+              try {
														
 
															+                const fullPath = pathModule.join(cg.getProjectRoot(), manifest.topHandlerFile);
														
 
															+                const stat = statSync(fullPath);
														
 
															+                if (stat.size > 0 && stat.size <= 16000) {
														
 
															+                  const source = readFileSync(fullPath, 'utf-8');
														
 
															+                  const capped = source.length > 7000 ? source.slice(0, 7000) + '\n... (truncated)' : source;
														
 
															+                  const ext = (manifest.topHandlerFile.match(/\.([a-z]+)$/i)?.[1] || '').toLowerCase();
														
 
															+                  const lang =
														
 
															+                    ext === 'rb' ? 'ruby' : ext === 'py' ? 'python' :
														
 
															+                    ext === 'go' ? 'go' : ext === 'rs' ? 'rust' :
														
 
															+                    ext === 'js' || ext === 'jsx' ? 'javascript' :
														
 
															+                    ext === 'ts' || ext === 'tsx' ? 'typescript' :
														
 
															+                    ext === 'java' ? 'java' : ext === 'kt' ? 'kotlin' :
														
 
															+                    ext === 'cs' ? 'csharp' : ext === 'php' ? 'php' :
														
 
															+                    ext === 'swift' ? 'swift' : ext === 'yml' || ext === 'yaml' ? 'yaml' : '';
														
 
															+                  lines.push('');
														
 
															+                  lines.push(`### Top handler file (\`${manifest.topHandlerFile}\` — ${manifest.topHandlerFileCount}/${manifest.totalRoutes} routes, full source inlined — do NOT Read)`);
														
 
															+                  lines.push('');
														
 
															+                  lines.push('```' + lang);
														
 
															+                  lines.push(capped);
														
 
															+                  lines.push('```');
														
 
															+                }
														
 
															+              } catch { /* file read failed, skip the source inline */ }
														
 
															+            }
														
 
															+            smallRepoRouteInline = lines.join('\n');
														
 
															+          }
														
 
															+        } catch {
														
 
															+          // Manifest build failed — drop silently
														
 
															+        }
														
 
															+      }
														
 
															       const sizeQualifier = isTinyRepo ? 'under 150' : 'under 500';
														
 
															-      smallRepoTail = `\n\n---\n> **This project is small** (${sizeQualifier} indexed files). The entry points and code above cover the relevant surface — **do NOT call codegraph_explore as a follow-up; its content will largely duplicate this response**. If you need a specific flow, call \`codegraph_trace from→to\`. If you need one specific symbol's body, call \`codegraph_node <name>\`. Otherwise, answer from what is above.`;
														
 
															+      const routingClause = smallRepoRouteInline
														
 
															+        ? ' The URL→handler manifest and top handler file are also inlined above — answer routing questions from them.'
														
 
															+        : '';
														
 
															+      smallRepoTail = `\n\n---\n> **This project is small** (${sizeQualifier} indexed files). The entry points and code above cover the relevant surface — **do NOT call codegraph_explore as a follow-up; its content will largely duplicate this response**. If you need a specific flow, call \`codegraph_trace from→to\`. If you need one specific symbol's body, call \`codegraph_node <name>\`.${routingClause} Otherwise, answer from what is above.`;
														
 
															     }
														
 
															     // buildContext returns string when format is 'markdown'
														
 
															     if (typeof context === 'string') {
														
 
															-      return this.textResult(this.truncateOutput(context + flowTrace + reminder + smallRepoTail));
														
 
															+      return this.textResult(this.truncateOutput(context + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
														
 
															     }
														
 
															     // If it returns TaskContext, format it
														
 
															-    return this.textResult(this.truncateOutput(this.formatTaskContext(context) + flowTrace + reminder + smallRepoTail));
														
 
															+    return this.textResult(this.truncateOutput(this.formatTaskContext(context) + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
														
 
															   }
														
 
															   /**