пре 15 часа · b45f309a1b
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
				 - CodeGraph now follows C/C++ commands that are dispatched through macro-built function-pointer tables, so the handler functions they reach are no longer dead-ends in the graph. Many C projects register a handler into a struct's function-pointer field through a macro and a generated table — redis is the classic case: every command (`getCommand`, `decrbyCommand`, …) is wired into the command struct's `proc` field by a `MAKE_CMD(…)` table that lives in a generated, `#include`-d file, then invoked as `c->cmd->proc(c)`. CodeGraph now reads those macro-built tables — including ones whose struct type is itself a macro alias, whose table sits in an `#include`-d file that is never indexed on its own, or that are wrapped in conditional compilation (`#ifdef`) and defined inline with the struct. It recognizes function-pointer fields declared through a function typedef, and follows the receiver — a chained access (`c->cmd->proc`) or an array subscript through a file-scope table (`(cmdnames[i].cmd_func)(…)`) — across field types. It also follows dispatch through a bare array of function pointers with no struct wrapper at all — the opcode/handler-table pattern common in interpreters and emulators, where a table like `opcodes[op](…)` invokes one of many registered handler functions by index — linking the dispatcher to every handler in the array. The upshot: asking for the callers or blast radius of a command handler now finds the dispatcher that reaches it. For redis, `call` shows up as a caller of every command; for SQLite, the builtin SQL functions registered through `FUNCTION(...)` link to where they're invoked; for Vim, every `:ex` and normal-mode command links from the dispatcher. (#991, extending #932)
			
 
				 - CodeGraph no longer times out when many agents query it at once. The shared background server that serves all your editor and agent sessions used to run every query on a single thread, so a burst of concurrent requests — for example a swarm of subagents exploring a large monorepo together — queued up behind one another and, while the heavy ones ran, froze the connection so finished answers couldn't even be sent back until the whole batch drained. Past a handful of simultaneous callers that routinely surfaced as MCP request timeouts. The shared server now answers queries across a pool of worker threads, so concurrent requests run in parallel and the connection stays responsive the whole time; when it's genuinely saturated a call returns a brief "busy, retry shortly" note (not an error) instead of hanging past your client's timeout. The pool sizes itself to your machine — roughly one worker per core, leaving one for coordination — and a single editor session is unaffected (no pool, no overhead). Set `CODEGRAPH_QUERY_POOL_SIZE` to choose a specific number of workers, or `0` to revert to single-threaded in-process queries.
			
 
				 
			
 
				+### Fixes
			
 
				+
			
 
				+- Claude Code's front-load prompt hook now fires for non-English prompts. The optional hook that injects CodeGraph context for structural questions only recognized English keywords, so a structural question written in Chinese — or any non-Latin-script language — silently injected nothing: the hook looked like it wasn't wired up despite a correct setup, with no error to explain why. The gate is now language-aware. It recognizes Chinese structural keywords (如何/流程/调用/依赖/实现/架构…), and — in any language — a prompt that names a real code symbol from your project, such as `getUserId`, `article_publish`, `user.login`, or `parseToken()` (the name is checked against the index, so an ordinary word that merely looks like code doesn't trigger it). Non-structural prompts ("fix this typo", in any language) stay a no-op as before, so nothing fires where there's no structural answer to give. Thanks @whinc for the detailed report and repro. (#994)
			
 
				+
			
 
				 
			
 
				 ## [1.1.1] - 2026-06-24
			
 
				 
			
--- a/__tests__/frontload-hook.test.ts
+++ b/__tests__/frontload-hook.test.ts
@@ -12,7 +12,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 
				 import * as fs from 'fs';
			
 
				 import * as os from 'os';
			
 
				 import * as path from 'path';
			
 
				-import { planFrontload, findIndexedSubprojectRoots } from '../src/directory';
			
 
				+import { planFrontload, findIndexedSubprojectRoots, isStructuralPrompt, hasStructuralKeyword, extractCodeTokens } from '../src/directory';
			
 
				 
			
 
				 /** Make `dir` look indexed (isInitialized needs `.codegraph/codegraph.db`). */
			
 
				 function mkIndexed(dir: string): string {
			
@@ -128,3 +128,76 @@ describe('findIndexedSubprojectRoots', () => {
 
				     expect(findIndexedSubprojectRoots(tmp, { maxDepth: 2 })).toEqual([]);
			
 
				   });
			
 
				 });
			
 
				+
			
 
				+describe('hasStructuralKeyword — keyword signal fires the hook directly (#994)', () => {
			
 
				+  it('English keywords match, with `\\b` so "flow" ≠ "flower"', () => {
			
 
				+    expect(hasStructuralKeyword('how does article publish work')).toBe(true);
			
 
				+    expect(hasStructuralKeyword('where is the token validated')).toBe(true);
			
 
				+    expect(hasStructuralKeyword('trace the request flow')).toBe(true);
			
 
				+    expect(hasStructuralKeyword('what calls parseToken')).toBe(true);
			
 
				+    expect(hasStructuralKeyword('water the flower')).toBe(false);   // "flow" in "flower"
			
 
				+  });
			
 
				+
			
 
				+  it('Chinese keywords match WITHOUT `\\b` — the #994 fix (were silently dropped)', () => {
			
 
				+    expect(hasStructuralKeyword('介绍文章发布流程')).toBe(true);      // introduce / flow
			
 
				+    expect(hasStructuralKeyword('登录是如何实现的')).toBe(true);       // how / implement
			
 
				+    expect(hasStructuralKeyword('这个函数的调用链')).toBe(true);        // call (chain)
			
 
				+    expect(hasStructuralKeyword('支付模块依赖哪些服务')).toBe(true);    // depend
			
 
				+    expect(hasStructuralKeyword('修复这个拼写错误')).toBe(false);       // "fix this typo"
			
 
				+  });
			
 
				+
			
 
				+  it('a bare code-token is NOT a keyword — it needs graph verification', () => {
			
 
				+    expect(hasStructuralKeyword('看看 get_user 这段逻辑')).toBe(false);
			
 
				+    expect(hasStructuralKeyword('I really love JavaScript')).toBe(false);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe('extractCodeTokens — candidate symbols the hook verifies against the graph', () => {
			
 
				+  it('pulls camelCase / PascalCase / snake_case / call / member tokens', () => {
			
 
				+    expect(extractCodeTokens('prepareArticlePublish 的调用链')).toContain('prepareArticlePublish');
			
 
				+    expect(extractCodeTokens('看看 get_user 这段逻辑')).toContain('get_user');   // snake_case
			
 
				+    expect(extractCodeTokens('render() 在哪触发')).toContain('render');          // call form
			
 
				+    expect(extractCodeTokens('user.login 做了什么').sort()).toEqual(['login', 'user']); // member access
			
 
				+    expect(extractCodeTokens('看看 UserService')).toContain('UserService');      // PascalCase class kept
			
 
				+  });
			
 
				+
			
 
				+  it('a tech brand is extracted as a CANDIDATE — the hook’s graph check is what rejects it', () => {
			
 
				+    // This is the #994 follow-up: "JavaScript" is identifier-shaped, so it surfaces
			
 
				+    // here as a candidate; the hook only fires if it's a real symbol in the index.
			
 
				+    expect(extractCodeTokens('I really love JavaScript')).toEqual(['JavaScript']);
			
 
				+    expect(extractCodeTokens('thoughts on GitHub vs GitLab').sort()).toEqual(['GitHub', 'GitLab']);
			
 
				+  });
			
 
				+
			
 
				+  it('ordinary prose and doc/data filenames yield no tokens', () => {
			
 
				+    expect(extractCodeTokens('fix typo in readme')).toEqual([]);
			
 
				+    expect(extractCodeTokens('fix the typo in README.md')).toEqual([]);   // doc filename excluded
			
 
				+    expect(extractCodeTokens('bump the version in package.json')).toEqual([]);
			
 
				+    expect(extractCodeTokens('water the flower')).toEqual([]);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe('isStructuralPrompt — cheap candidate gate (keyword OR code-token)', () => {
			
 
				+  it('fires on a keyword prompt in any language', () => {
			
 
				+    expect(isStructuralPrompt('how does article publish work')).toBe(true);
			
 
				+    expect(isStructuralPrompt('介绍文章发布流程')).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('fires on a code-token prompt with no keyword', () => {
			
 
				+    expect(isStructuralPrompt('看看 get_user 这段逻辑')).toBe(true);
			
 
				+    expect(isStructuralPrompt('where is prepareArticlePublish 定义')).toBe(true);
			
 
				+    expect(isStructuralPrompt('user.login 做了什么')).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('a tech brand passes the CHEAP gate as a candidate — the hook then graph-verifies it', () => {
			
 
				+    // Layering, not a bug: isStructuralPrompt is shape-only, so a token-shaped brand
			
 
				+    // is a candidate here; the hook rejects it as a non-symbol (proven by the CLI e2e).
			
 
				+    expect(isStructuralPrompt('I really love JavaScript')).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('non-structural prose stays a no-op — in either language', () => {
			
 
				+    expect(isStructuralPrompt('fix typo in readme')).toBe(false);
			
 
				+    expect(isStructuralPrompt('修复这个拼写错误')).toBe(false);
			
 
				+    expect(isStructuralPrompt('water the flower')).toBe(false);
			
 
				+    expect(isStructuralPrompt('')).toBe(false);
			
 
				+  });
			
 
				+});
			
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -26,7 +26,7 @@
 
				 import { Command } from 'commander';
			
 
				 import * as path from 'path';
			
 
				 import * as fs from 'fs';
			
 
				-import { getCodeGraphDir, isInitialized, unsafeIndexRootReason, findNearestCodeGraphRoot, planFrontload } from '../directory';
			
 
				+import { getCodeGraphDir, isInitialized, unsafeIndexRootReason, findNearestCodeGraphRoot, planFrontload, hasStructuralKeyword, extractCodeTokens } from '../directory';
			
 
				 import { detectWorktreeIndexMismatch, worktreeMismatchWarning } from '../sync/worktree';
			
 
				 import { createShimmerProgress } from '../ui/shimmer-progress';
			
 
				 import { getGlyphs } from '../ui/glyphs';
			
@@ -1053,11 +1053,15 @@ program
 
				       try { input = JSON.parse(raw); } catch { return; }
			
 
				       const prompt = String(input.prompt || '');
			
 
				 
			
 
				-      // Gate: only structural / flow / impact / where-how prompts get context.
			
 
				-      // A cheap regex keeps every other prompt ("fix this typo") a zero-cost
			
 
				-      // no-op so we never add latency where there's no structural answer to give.
			
 
				-      const STRUCTURAL = /\b(how|where|trace|flow|path|reach(?:es|ed)?|call(?:s|ed|er|ers|ee)?|depend|impact|affect|wired?|connect|implement|architect|structure|breaks?|what calls|why does)\b/i;
			
 
				-      if (!prompt || !STRUCTURAL.test(prompt)) return;
			
 
				+      // Gate: only structural / flow / impact / where-how prompts get context, so
			
 
				+      // every other prompt ("fix this typo") stays a zero-cost no-op. Language-aware
			
 
				+      // (English + CJK keywords, plus code-shaped tokens) so it fires for non-English
			
 
				+      // prompts too (issue #994). A keyword fires on its own; a code-token is only a
			
 
				+      // CANDIDATE — verified against the graph below, so a tech brand ("JavaScript")
			
 
				+      // that looks like a symbol but isn't one here doesn't inject spurious context.
			
 
				+      const keyworded = hasStructuralKeyword(prompt);
			
 
				+      const codeTokens = keyworded ? [] : extractCodeTokens(prompt);
			
 
				+      if (!keyworded && codeTokens.length === 0) return;
			
 
				 
			
 
				       // Decide what to inject, shaped by WHERE the index(es) are: the nearest
			
 
				       // indexed ancestor of cwd, or — when cwd is an un-indexed workspace root
			
@@ -1079,6 +1083,12 @@ program
 
				         const { default: CodeGraph } = await loadCodeGraph();
			
 
				         const cg = await CodeGraph.open(plan.exploreRoot);
			
 
				         try {
			
 
				+          // Code-token-only prompt: require that at least one token is a REAL symbol
			
 
				+          // in THIS index before front-loading. Without it, a brand name or common
			
 
				+          // word that merely looks like code ("JavaScript", "GitHub") would run
			
 
				+          // explore and inject ~16KB of low-relevance context (issue #994 follow-up).
			
 
				+          // A keyword-bearing prompt skips this — the keyword is signal enough.
			
 
				+          if (!keyworded && !codeTokens.some((t) => cg.getNodesByName(t).length > 0)) return;
			
 
				           const { ToolHandler } = await import('../mcp/tools');
			
 
				           const handler = new ToolHandler(cg);
			
 
				           const result = await handler.execute('codegraph_explore', { query: prompt });
			
--- a/src/directory.ts
+++ b/src/directory.ts
@@ -233,6 +233,86 @@ export function findIndexedSubprojectRoots(
 
				   return out;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * English structural keywords, matched with `\b` word boundaries so a keyword
			
 
				+ * inside a longer word doesn't false-positive ("flow" in "flower").
			
 
				+ */
			
 
				+const STRUCTURAL_EN = /\b(how|where|trace|flow|path|reach(?:es|ed)?|call(?:s|ed|er|ers|ee)?|depend|impact|affect|wired?|connect|implement|architect|structure|breaks?|what calls|why does)\b/i;
			
 
				+
			
 
				+/**
			
 
				+ * Non-English (CJK) structural keywords, matched WITHOUT `\b`. JS's `\b` is
			
 
				+ * ASCII-only — it only fires at `[A-Za-z0-9_]` boundaries, never between Han
			
 
				+ * characters — so a Chinese keyword wrapped in `\b…\b` could never match. That
			
 
				+ * was issue #994: the English-only gate silently no-op'd every Chinese prompt,
			
 
				+ * so non-English users got no front-load nudge and no error to explain why. The
			
 
				+ * set mirrors the English intent (如何=how, 在哪/哪里=where, 流程/流向=flow,
			
 
				+ * 路径=path, 调用=call, 依赖=depend, 影响=impact/affect, 实现=implement,
			
 
				+ * 架构=architect, 结构=structure, 追踪/跟踪=trace) plus structural-overview words
			
 
				+ * with no single clean English equivalent (介绍/解析/分析/原理/机制).
			
 
				+ */
			
 
				+const STRUCTURAL_CJK = /如何|怎么|在哪|哪里|追踪|跟踪|流程|流向|路径|调用|依赖|影响|实现|架构|结构|介绍|解析|分析|原理|机制/;
			
 
				+
			
 
				+/** Doc/data/asset file extensions — a `name.ext` of this kind is a file
			
 
				+ *  reference, not a code symbol, so it must not trip the member-access signal. */
			
 
				+const DOC_DATA_EXT = /\.(md|markdown|txt|rst|json|ya?ml|toml|lock|csv|tsv|log|ini|cfg|conf|env|xml|html?|png|jpe?g|gif|svg|pdf)$/i;
			
 
				+
			
 
				+/**
			
 
				+ * Does `prompt` contain an explicit structural keyword (English or CJK)? A
			
 
				+ * keyword is a strong, self-contained signal, so the front-load hook fires on it
			
 
				+ * directly — no graph check needed. (A *code-token* match, by contrast, is only
			
 
				+ * a candidate the hook verifies against the graph first; see {@link extractCodeTokens}.)
			
 
				+ */
			
 
				+export function hasStructuralKeyword(prompt: string): boolean {
			
 
				+  return !!prompt && (STRUCTURAL_EN.test(prompt) || STRUCTURAL_CJK.test(prompt));
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Identifier-shaped tokens in `prompt` — camelCase / PascalCase-with-inner-cap,
			
 
				+ * snake_case, a `name(` call, or the two sides of an `a.b` member access. Naming
			
 
				+ * a symbol is a code question whatever the surrounding human language, and these
			
 
				+ * shapes almost never occur in ordinary prose, so they catch the common
			
 
				+ * "<symbol> 的调用链?" / "where is <symbol> 定義" prompts no keyword list would.
			
 
				+ *
			
 
				+ * These are *candidates*, not a verdict: a tech brand like `JavaScript` or
			
 
				+ * `GitHub` is identifier-shaped too, so the front-load hook checks each token
			
 
				+ * against the actual index ({@link getNodesByName}) and only fires when one is a
			
 
				+ * real symbol here — otherwise a brand-name prompt would inject ~16KB of
			
 
				+ * low-relevance context (issue #994 follow-up). A doc/data filename ("README.md")
			
 
				+ * is excluded from the member-access form since it's a file reference, not a symbol.
			
 
				+ */
			
 
				+export function extractCodeTokens(prompt: string): string[] {
			
 
				+  if (!prompt) return [];
			
 
				+  const out = new Set<string>();
			
 
				+  // camelCase / PascalCase-with-inner-cap (getUserId, parseToken, UserService) or
			
 
				+  // snake_case (article_publish, get_user) — a whole identifier run that has an
			
 
				+  // inner lower→upper transition or an underscore flanked by alphanumerics.
			
 
				+  for (const m of prompt.matchAll(/[A-Za-z_$][\w$]*/g)) {
			
 
				+    const w = m[0];
			
 
				+    if (/[a-z][A-Z]/.test(w) || /[A-Za-z0-9]_[A-Za-z0-9]/.test(w)) out.add(w);
			
 
				+  }
			
 
				+  // call form: an identifier directly before '(' — parseToken(, render(). No
			
 
				+  // whitespace before '(' so prose like "the function (entry point)" doesn't trip it.
			
 
				+  for (const m of prompt.matchAll(/([A-Za-z_$][\w$]*)\(/g)) out.add(m[1]!);
			
 
				+  // member access on identifiers (user.login) — but not a doc/data filename.
			
 
				+  for (const m of prompt.matchAll(/([A-Za-z_$][\w$]*)\.([A-Za-z_$][\w$]*)/g)) {
			
 
				+    if (!DOC_DATA_EXT.test(m[0])) { out.add(m[1]!); out.add(m[2]!); }
			
 
				+  }
			
 
				+  return [...out];
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Cheap, graph-free candidate gate for the front-load hook: could `prompt` be a
			
 
				+ * structural / flow / impact / "where-how" question worth front-loading context
			
 
				+ * for? True on an explicit keyword (English or CJK, issue #994) OR an
			
 
				+ * identifier-shaped token. A keyword is sufficient to fire on its own; a
			
 
				+ * token-only match is only a candidate the hook then verifies against the graph
			
 
				+ * (a brand name like `JavaScript` is token-shaped but isn't a symbol). Every
			
 
				+ * non-candidate prompt ("fix this typo", in any language) stays a zero-cost no-op.
			
 
				+ */
			
 
				+export function isStructuralPrompt(prompt: string): boolean {
			
 
				+  return hasStructuralKeyword(prompt) || extractCodeTokens(prompt).length > 0;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * What the front-load hook should do for a prompt issued from a directory.
			
 
				  */