10 Commits e43ac82cdf ... ba209d9489

Tác giả SHA1 Thông báo Ngày
  Colby Mchenry ba209d9489 feat(c/c++): resolve function-pointer dispatch (#932) (#954) 19 giờ trước cách đây
  Colby Mchenry 826810f128 feat(java): index Lombok-generated members so call chains resolve (#912) (#953) 20 giờ trước cách đây
  Colby Mchenry 3e1547bbe1 fix(mcp): bold labels instead of ATX headings in tool results (#778) (#951) 20 giờ trước cách đây
  Colby Mchenry ace8d8a0d0 fix(mcp): stop the first tool call hanging on a huge-repo catch-up reconcile (#905) (#950) 21 giờ trước cách đây
  Colby Mchenry 2010c2d2b5 fix(sync): apply the ignore matcher to the git change-detection fast path (#766) (#949) 21 giờ trước cách đây
  Colby Mchenry 149b4e11c7 Enhance README with CodeGraph benefits and image 22 giờ trước cách đây
  Colby McHenry 2f3188eb49 docs: reframe value prop around precision/speed, update Node floor to 20, and expand language/framework coverage 23 giờ trước cách đây
  Colby Mchenry f63e5db2cc fix(extraction): drop the phantom C++ function from a macro-annotated class misparse (#946) (#948) 1 ngày trước cách đây
  Colby Mchenry 2bdc169ce4 fix(extraction): skip submodule worktrees instead of indexing them as duplicates (#945) (#947) 1 ngày trước cách đây
  Colby Mchenry 03666584ed fix(extraction): drop the `./` self-entry from `git ls-files --directory` (#936) (#941) 1 ngày trước cách đây
47 tập tin đã thay đổi với 1669 bổ sung192 xóa
  1. 8 0
      CHANGELOG.md
  2. 1 1
      CLAUDE.md
  3. 22 16
      README.md
  4. 7 6
      __tests__/adaptive-explore-sizing.test.ts
  5. 147 0
      __tests__/c-fnptr-synthesizer.test.ts
  6. 12 12
      __tests__/dynamic-boundaries.test.ts
  7. 1 1
      __tests__/explore-blast-radius.test.ts
  8. 3 2
      __tests__/explore-corroboration-ranking.test.ts
  9. 2 2
      __tests__/explore-output-budget.test.ts
  10. 2 2
      __tests__/explore-synth-constant-endpoints.test.ts
  11. 57 0
      __tests__/extraction.test.ts
  12. 156 0
      __tests__/lombok.test.ts
  13. 51 0
      __tests__/mcp-catchup-gate.test.ts
  14. 2 2
      __tests__/mcp-staleness-banner.test.ts
  15. 57 0
      __tests__/multi-repo-workspace.test.ts
  16. 1 1
      __tests__/node-file-view.test.ts
  17. 4 4
      __tests__/offload.test.ts
  18. 4 1
      __tests__/same-name-disambiguation.test.ts
  19. 125 0
      __tests__/sync.test.ts
  20. 1 0
      docs/design/dispatch-synthesizer-backlog.md
  21. 1 1
      package.json
  22. 1 1
      scripts/agent-eval/offload-eval-metrics.mjs
  23. 1 1
      scripts/agent-eval/probe-explore.mjs
  24. 1 1
      site/src/content/docs/core-concepts/how-it-works.md
  25. 1 1
      site/src/content/docs/core-concepts/resolution.md
  26. 5 4
      site/src/content/docs/getting-started/installation.md
  27. 7 8
      site/src/content/docs/getting-started/introduction.md
  28. 11 8
      site/src/content/docs/getting-started/quickstart.md
  29. 13 6
      site/src/content/docs/getting-started/your-first-graph.md
  30. 10 7
      site/src/content/docs/guides/framework-routes.md
  31. 3 3
      site/src/content/docs/guides/indexing.md
  32. 24 0
      site/src/content/docs/reference/api.md
  33. 22 8
      site/src/content/docs/reference/cli.md
  34. 5 9
      site/src/content/docs/reference/integrations.md
  35. 3 0
      site/src/content/docs/reference/languages.md
  36. 23 8
      site/src/content/docs/reference/mcp-server.md
  37. 6 2
      site/src/content/docs/troubleshooting.md
  38. 75 10
      src/extraction/index.ts
  39. 39 2
      src/extraction/languages/c-cpp.ts
  40. 225 10
      src/extraction/languages/java.ts
  41. 14 0
      src/extraction/tree-sitter-types.ts
  42. 8 0
      src/extraction/tree-sitter.ts
  43. 132 44
      src/mcp/tools.ts
  44. 9 7
      src/reasoning/reasoner.ts
  45. 359 0
      src/resolution/c-fnptr-synthesizer.ts
  46. 3 0
      src/resolution/callback-synthesizer.ts
  47. 5 1
      src/resolution/strip-comments.ts

+ 8 - 0
CHANGELOG.md

@@ -21,6 +21,8 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - `codegraph_explore` now follows **MediatR** request and notification dispatch in C#/.NET. A `_mediator.Send(command)` or `_mediator.Publish(notification)` call now links to the `Handle` method of the matching `IRequestHandler<>` / `INotificationHandler<>` — usually in a different file in a Clean Architecture layout — so "what handles this command?" traces from the controller straight into the handler instead of stopping at the mediator call. The sent type is recognized whether it's constructed inline (`Send(new GetFooQuery())`), built into a local first (`var cmd = new …; Send(cmd)`), or passed in as a parameter, and it's matched by type — so a `MessagingCenter.Send(...)` or a same-named DTO that isn't a request is never mislinked, and a project without MediatR is unaffected.
 - `codegraph_explore` now follows **MediatR** request and notification dispatch in C#/.NET. A `_mediator.Send(command)` or `_mediator.Publish(notification)` call now links to the `Handle` method of the matching `IRequestHandler<>` / `INotificationHandler<>` — usually in a different file in a Clean Architecture layout — so "what handles this command?" traces from the controller straight into the handler instead of stopping at the mediator call. The sent type is recognized whether it's constructed inline (`Send(new GetFooQuery())`), built into a local first (`var cmd = new …; Send(cmd)`), or passed in as a parameter, and it's matched by type — so a `MessagingCenter.Send(...)` or a same-named DTO that isn't a request is never mislinked, and a project without MediatR is unaffected.
 - `codegraph_explore` now follows **Sidekiq** background-job dispatch in Ruby. A `DestroyUserWorker.perform_async(id)` (or `.perform_in` / `.perform_at`) call now links to that worker's `perform` method — usually in `app/workers/` away from the controller or model that enqueues it — so "what runs in the background here?" traces from the enqueue straight into the job body. Both the modern `include Sidekiq::Job` and the older `Sidekiq::Worker` are recognized, namespaced workers resolve to the right class even when several share a name (e.g. `Comments::NotifyWorker` vs `Articles::NotifyWorker`), and Rails ActiveJob's `perform_later` — a different mechanism — is intentionally left alone.
 - `codegraph_explore` now follows **Sidekiq** background-job dispatch in Ruby. A `DestroyUserWorker.perform_async(id)` (or `.perform_in` / `.perform_at`) call now links to that worker's `perform` method — usually in `app/workers/` away from the controller or model that enqueues it — so "what runs in the background here?" traces from the enqueue straight into the job body. Both the modern `include Sidekiq::Job` and the older `Sidekiq::Worker` are recognized, namespaced workers resolve to the right class even when several share a name (e.g. `Comments::NotifyWorker` vs `Articles::NotifyWorker`), and Rails ActiveJob's `perform_later` — a different mechanism — is intentionally left alone.
 - `codegraph_explore` now follows **Laravel events** in PHP. An `event(new OrderShipped($order))` call now links to every listener that handles it — each listener's `handle()` method, usually a separate `app/Listeners/` class — so "what reacts to this event?" traces from the dispatch straight into the listener bodies. Listeners are found both ways Laravel registers them: by a typed `handle(OrderShipped $event)` (auto-discovery, including a `handle(A|B $event)` union that listens for two events) and by the `protected $listen` map in your `EventServiceProvider` (which also catches a listener whose `handle()` has no type-hint). One event fans out to all its listeners, and queued jobs — dispatched via `::dispatch()` rather than `event()` — are correctly left out.
 - `codegraph_explore` now follows **Laravel events** in PHP. An `event(new OrderShipped($order))` call now links to every listener that handles it — each listener's `handle()` method, usually a separate `app/Listeners/` class — so "what reacts to this event?" traces from the dispatch straight into the listener bodies. Listeners are found both ways Laravel registers them: by a typed `handle(OrderShipped $event)` (auto-discovery, including a `handle(A|B $event)` union that listens for two events) and by the `protected $listen` map in your `EventServiceProvider` (which also catches a listener whose `handle()` has no type-hint). One event fans out to all its listeners, and queued jobs — dispatched via `::dispatch()` rather than `event()` — are correctly left out.
+- CodeGraph now understands **Lombok**-generated methods in Java. `@Getter`, `@Setter`, `@Data`, `@Value`, and `@Builder` generate getters, setters, `builder()`, `equals`/`hashCode`/`toString`, and the `@Slf4j` `log` field at compile time, so those methods never appear in the source — and a `user.getName()`, `User.builder()`, or `log.info(...)` call used to resolve to nothing, silently breaking call-chain analysis (the agent would conclude the method didn't exist and reconstruct it by hand). Those members are now indexed from the annotations and fields, so they appear in `codegraph search` and `codegraph_explore`/`codegraph_node`, and callers trace through them like any hand-written method. They're marked as Lombok-generated so they read as generated, not hand-written; a method you write yourself is never overridden, static fields get no accessor, and a class without Lombok is unaffected. Thanks @git87663849. (#912)
+- `codegraph_explore` now follows **C and C++ function-pointer dispatch**. C does polymorphism with function pointers: a struct carries a function-pointer field, concrete functions are registered into it through a table (`static struct cmd commands[] = {{"add", cmd_add}, …}`), a designated initializer (`.handler = on_open`), or an assignment, and the code dispatches indirectly (`p->fn(argv)`). None of that was visible to analysis — the indirect call resolved to nothing, so `git`'s command runner looked like it called nothing and a vtable's implementations had no callers. CodeGraph now links the dispatch site to the registered handlers, keyed by the struct field, so "what runs when this dispatches?" traces from `p->fn(...)` into every function registered for that field. This covers the command-table idiom (git, redis) and the ops-struct/vtable idiom (curl's content-encoders, protocol handlers), including the case where a generic hook slot is reassigned from a registry (`h->func = found->fn`). It stays precise — distinct function-pointer fields don't cross-link, a plain data field is never treated as a dispatch, and a project without function-pointer dispatch is unaffected. (#932)
 
 
 - `codegraph_explore` now surfaces the right code in large multi-layer projects. When you ask a backend-flow question in a repo that pairs an API server with a big frontend that mirrors the same domain words — say an `app/` admin UI sitting over an `api/` server — the server-side file that genuinely matches several of your query's terms is no longer pushed out of the results by the larger, more interconnected frontend layer. A file corroborated by two or more distinct query terms is now kept in the answer even when a denser unrelated layer would otherwise crowd it out, so "how does X read items / handle the request" returns the service or handler that does the work instead of a wall of frontend views. Single-layer projects are unaffected; set `CODEGRAPH_RANK_NO_MULTITERM=1` to revert to the previous ranking.
 - `codegraph_explore` now surfaces the right code in large multi-layer projects. When you ask a backend-flow question in a repo that pairs an API server with a big frontend that mirrors the same domain words — say an `app/` admin UI sitting over an `api/` server — the server-side file that genuinely matches several of your query's terms is no longer pushed out of the results by the larger, more interconnected frontend layer. A file corroborated by two or more distinct query terms is now kept in the answer even when a denser unrelated layer would otherwise crowd it out, so "how does X read items / handle the request" returns the service or handler that does the work instead of a wall of frontend views. Single-layer projects are unaffected; set `CODEGRAPH_RANK_NO_MULTITERM=1` to revert to the previous ranking.
 - Impact and blast-radius analysis for TypeScript, JavaScript, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, and Pascal/Delphi now understands the readers of a constant. When you change a file-scope, package-level, module-level, or class-level constant — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
 - Impact and blast-radius analysis for TypeScript, JavaScript, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, and Pascal/Delphi now understands the readers of a constant. When you change a file-scope, package-level, module-level, or class-level constant — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off.
@@ -31,6 +33,8 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
 ### Fixes
 ### Fixes
 
 
+- MCP tool results no longer show up as oversized headings in Markdown-rendering clients (such as the Claude Code VSCode extension). Results used Markdown headings (`##`/`###`) for things like the status summary, each search hit, and every file section in an exploration, so a normal query filled the transcript with large-font lines — worst with `codegraph_search` and `codegraph_explore`, where the noise grew with the number of results. Section headers are now bold labels, which render at normal text size while keeping the same structure. Terminal/CLI output is unchanged. (#778)
+- An MCP server pointed at a very large repository (tens of thousands of files) no longer hangs on the first tool call after a fresh start. On startup CodeGraph reconciles its index against the current files on disk, and on a huge repo that reconcile could run for minutes while blocking the very first request — long enough that the background server was sometimes force-restarted mid-scan, so the first query never came back at all. The reconcile now yields as it runs (keeping the server responsive instead of pinning it), and the first tool call waits only briefly for it before answering and letting the rest finish in the background — so you get a fast first response and the index still catches up. Set `CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS` to tune how long that first call waits (default 3000ms), or `=0` to always wait for the full reconcile. (#905)
 - `codegraph install` now wires up your agents and stops there — it no longer indexes the current directory. Building a project's graph is always the explicit `codegraph init` (or `codegraph index`), so you decide what gets indexed and when, and the steps are the same whether you installed globally or just for one project. This clears up the confusion where a project-local install silently indexed but a global one didn't, and where the docs and the tool disagreed about whether you still had to run `init`. (#826)
 - `codegraph install` now wires up your agents and stops there — it no longer indexes the current directory. Building a project's graph is always the explicit `codegraph init` (or `codegraph index`), so you decide what gets indexed and when, and the steps are the same whether you installed globally or just for one project. This clears up the confusion where a project-local install silently indexed but a global one didn't, and where the docs and the tool disagreed about whether you still had to run `init`. (#826)
 - React components declared with `forwardRef`, `memo`, or styled-components / emotion (`const Button = forwardRef(...)`, `const Card = memo(...)`, `const Box = styled.button\`…\``) are now recognized as components, so finding where they're used works. Before, they were indexed as plain constants, so `codegraph callers` and impact analysis reported "no callers found" even when the component was rendered across dozens of files — a dangerous false "safe to change" right before refactoring a shared component. Now every `<Button/>` usage links back to the component, so callers and blast radius are complete. This is the standard shadcn/ui declaration style, so for typical React design systems the whole UI layer is no longer invisible to impact analysis. Thanks @Arlandaren for the report and @maxmilian for the root-cause. (#841)
 - React components declared with `forwardRef`, `memo`, or styled-components / emotion (`const Button = forwardRef(...)`, `const Card = memo(...)`, `const Box = styled.button\`…\``) are now recognized as components, so finding where they're used works. Before, they were indexed as plain constants, so `codegraph callers` and impact analysis reported "no callers found" even when the component was rendered across dozens of files — a dangerous false "safe to change" right before refactoring a shared component. Now every `<Button/>` usage links back to the component, so callers and blast radius are complete. This is the standard shadcn/ui declaration style, so for typical React design systems the whole UI layer is no longer invisible to impact analysis. Thanks @Arlandaren for the report and @maxmilian for the root-cause. (#841)
 - React Router and Next.js routes defined in `.tsx` / `.jsx` files are now indexed. Routes written as JSX — `<Route path="/users" element={<UsersPage/>}/>`, `createBrowserRouter([...])`, and Next.js `app/`/`pages/` page files — were being skipped entirely (only routes that happened to live in plain `.ts`/`.js` were picked up), so "what renders at this path?" and the route → page-component link were missing for most React apps. Now those routes show up in `codegraph search`/`codegraph_explore` and connect to the component they render, just like the backend route → handler links on other frameworks.
 - React Router and Next.js routes defined in `.tsx` / `.jsx` files are now indexed. Routes written as JSX — `<Route path="/users" element={<UsersPage/>}/>`, `createBrowserRouter([...])`, and Next.js `app/`/`pages/` page files — were being skipped entirely (only routes that happened to live in plain `.ts`/`.js` were picked up), so "what renders at this path?" and the route → page-component link were missing for most React apps. Now those routes show up in `codegraph search`/`codegraph_explore` and connect to the component they render, just like the backend route → handler links on other frameworks.
@@ -40,6 +44,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - On Linux, hitting the kernel's inotify watch limit on a large project no longer silently leaves half the tree unwatched. CodeGraph now tells you once — naming the exact setting to raise (`fs.inotify.max_user_watches`, e.g. `sudo sysctl fs.inotify.max_user_watches=1048576`) — and keeps live-watching the directories it could register while `codegraph sync` (or the git sync hooks) covers the rest. (#876)
 - On Linux, hitting the kernel's inotify watch limit on a large project no longer silently leaves half the tree unwatched. CodeGraph now tells you once — naming the exact setting to raise (`fs.inotify.max_user_watches`, e.g. `sudo sysctl fs.inotify.max_user_watches=1048576`) — and keeps live-watching the directories it could register while `codegraph sync` (or the git sync hooks) covers the rest. (#876)
 - A long-running MCP server now notices when a git worktree gains its own index. Before, if the server (or shared daemon) first saw a worktree before you ran `codegraph init` in it — so the lookup walked up to the main checkout's index — it pinned that decision for its whole life: even after the worktree had its own `.codegraph/`, every query kept hitting the main checkout's index and every result carried a false "this index belongs to a different git working tree" warning, until you restarted the server. The CLI got it right but the MCP server didn't, and re-indexing didn't help. The server now re-checks which index a path belongs to on each call, so the worktree's own index is picked up — and the stale warning drops — without a restart. (#926)
 - A long-running MCP server now notices when a git worktree gains its own index. Before, if the server (or shared daemon) first saw a worktree before you ran `codegraph init` in it — so the lookup walked up to the main checkout's index — it pinned that decision for its whole life: even after the worktree had its own `.codegraph/`, every query kept hitting the main checkout's index and every result carried a false "this index belongs to a different git working tree" warning, until you restarted the server. The CLI got it right but the MCP server didn't, and re-indexing didn't help. The server now re-checks which index a path belongs to on each call, so the worktree's own index is picked up — and the stale warning drops — without a restart. (#926)
 - A long-running MCP server now recovers when your index is deleted and rebuilt at the same path. If `.codegraph/` was removed and recreated while the server held it open — most easily by recreating a git worktree at the same path, or `rm`-ing `.codegraph/` and running `codegraph init` again — the server kept reading the old, deleted database file and served a frozen snapshot: renamed or removed symbols still showed as live, new ones were missing, and `codegraph sync` couldn't refresh it — only restarting the server fixed it. The server now detects that the database file was swapped out from under it and reopens the live one in place, so results stay correct without a restart. (On Linux and macOS; Windows doesn't allow deleting an open file, so it isn't affected.) (#925)
 - A long-running MCP server now recovers when your index is deleted and rebuilt at the same path. If `.codegraph/` was removed and recreated while the server held it open — most easily by recreating a git worktree at the same path, or `rm`-ing `.codegraph/` and running `codegraph init` again — the server kept reading the old, deleted database file and served a frozen snapshot: renamed or removed symbols still showed as live, new ones were missing, and `codegraph sync` couldn't refresh it — only restarting the server fixed it. The server now detects that the database file was swapped out from under it and reopens the live one in place, so results stay correct without a restart. (On Linux and macOS; Windows doesn't allow deleting an open file, so it isn't affected.) (#925)
+- The MCP server now opens and auto-syncs a project that lives inside a folder an enclosing git repository ignores. Before, if the directory you indexed sat within a larger repo that gitignored it, the shared MCP daemon failed to open the project — its log repeated `Failed to open project … path should be a` `path.relative()` `d string, but got "./"` — so the file watcher never started and the index silently went stale until you ran `codegraph sync` by hand (setting `CODEGRAPH_NO_DAEMON=1` was the only workaround). The daemon now opens the project and starts watching as expected. Most visible with Codex on Windows, but the cause wasn't platform-specific. (#936)
+- A git worktree of a submodule is no longer indexed as a duplicate copy of that submodule's code. CodeGraph already skips ordinary worktrees (a second working view of a repo it indexes), but a worktree created *from a submodule* — common in monorepos that check submodules out into worktrees for parallel feature work — was mistaken for a genuine embedded repo and swept in, duplicating every symbol it shared with the real submodule checkout (one report had ~28% of its index as duplicates, inflating both query results and the database). These submodule worktrees are now recognized and skipped, while the submodule's own checkout stays indexed as distinct code. Thanks @charlesxu2026-ship-it. (#945)
+- A C++ class or struct annotated with an export/visibility macro — `class MYLIB_EXPORT Foo : public Bar { … }`, the common DLL-export / visibility pattern in headers — is no longer mis-indexed as a function spanning the whole class body. Not knowing the macro is a macro, the parser read it as a type and the whole declaration as a function, so the class surfaced as a phantom `function` that showed up as a false caller in `codegraph callers`, `codegraph impact`, and blast-radius analysis, and skewed symbol counts. CodeGraph now recognizes this misparse and drops the bogus node. Thanks @spwlyzx. (#946)
+- `codegraph status` no longer reports phantom pending changes for files CodeGraph deliberately keeps out of the index — a tracked file inside a committed dependency dir (a checked-in `vendor/` or `node_modules/`), or a tracked file under a `.gitignore`d directory. A full index correctly excludes these, and `codegraph sync` never indexes them, but the fast change-detector behind `codegraph status` flagged every edit to such a file as "modified" (and a new one as "added") — so the pending-changes count never cleared no matter how many times you synced. Change detection now applies the same ignore rules the full index does, so `status` agrees with `sync`, and tools built on CodeGraph's change-detection API get the same corrected list. (#766)
 
 
 
 
 ## [1.0.1] - 2026-06-13
 ## [1.0.1] - 2026-06-13

+ 1 - 1
CLAUDE.md

@@ -29,7 +29,7 @@ npx vitest run __tests__/extraction.test.ts -t "TypeScript"
 
 
 `copy-assets` (called from `build`) copies `src/db/schema.sql` and all `src/extraction/wasm/*.wasm` files into `dist/`. **Any new SQL or grammar wasm must be copied or it won't ship.**
 `copy-assets` (called from `build`) copies `src/db/schema.sql` and all `src/extraction/wasm/*.wasm` files into `dist/`. **Any new SQL or grammar wasm must be copied or it won't ship.**
 
 
-Node engines: `>=18.0.0 <25.0.0`. There is a hard exit on Node 25.x (see `src/bin/node-version-check.ts`).
+Node engines: `>=20.0.0 <25.0.0`. There is a hard exit on Node 25.x and below 20 (see `src/bin/node-version-check.ts`).
 
 
 ## Architecture
 ## Architecture
 
 

+ 22 - 16
README.md

@@ -10,7 +10,7 @@ Follow [@getcodegraph](https://x.com/getcodegraph) on X for updates.
 
 
 ### Supercharge Claude Code, Cursor, Codex, OpenCode, Hermes Agent, Gemini, Antigravity, and Kiro with Semantic Code Intelligence
 ### Supercharge Claude Code, Cursor, Codex, OpenCode, Hermes Agent, Gemini, Antigravity, and Kiro with Semantic Code Intelligence
 
 
-**~16% cheaper · ~58% fewer tool calls · 100% local**
+**Surgical context · fewer tool calls · faster answers · 100% local**
 
 
 ### [Documentation & Website →](https://colbymchenry.github.io/codegraph/)
 ### [Documentation & Website →](https://colbymchenry.github.io/codegraph/)
 
 
@@ -111,27 +111,33 @@ codegraph uninstall
 
 
 ## Why CodeGraph?
 ## Why CodeGraph?
 
 
-When Claude Code explores a codebase, it spawns **Explore agents** that scan files with grep, glob, and Read — consuming tokens on every tool call.
+When an AI agent needs to understand code — to answer a question or make a change — it discovers structure the slow way: grep, glob, and Read, one file at a time, rebuilding call paths and dependencies by hand. That's a pile of tool calls and round-trips before it even starts the real work.
 
 
-**CodeGraph gives those agents a pre-indexed knowledge graph** — symbol relationships, call graphs, and code structure. Agents query the graph instantly instead of scanning files.
+**CodeGraph hands the agent the exact code it needs in one call.** It's a pre-built knowledge graph of every symbol, call edge, and dependency in your codebase — so instead of crawling files, the agent asks one question and gets back the relevant source, the call paths between those symbols (including dynamic-dispatch hops grep can't follow), and the blast radius of a change. **Surgical context, not a file-by-file search** — which means fewer tool calls and faster answers on every codebase, large or small.
+
+<img width="1536" height="1024" alt="token-cost-savings-scale" src="https://github.com/user-attachments/assets/eb74a11a-a3ab-4b01-80a6-19f78352ae8e" />
+
+> **A note on cost:** CodeGraph's win on *every* codebase is precision and speed — fewer tool calls, faster answers. It cuts token and dollar cost too, but those savings are **scale-dependent**: small and noisy on a modest codebase, and material only once a repo is large and tangled — at the scale of a Google or Microsoft monorepo, multiplied by a whole team's daily agent usage — for them to compound into a real line item. On a 500-file project, adopt CodeGraph for the speed; the cost savings show up when the codebase (and the team) gets big.
 
 
 ### Benchmark Results
 ### Benchmark Results
 
 
-Tested across **7 real-world open-source codebases** spanning 7 languages, comparing an agent (Claude Code, headless) answering one architecture question **with** and **without** CodeGraph. Each cell is the savings at the **median of 4 runs per arm**. _Re-validated on Opus 4.8 (2026-06-02), on the current build (`codegraph_explore` as the primary tool)._
+Tested across **7 real-world open-source codebases** spanning 7 languages, comparing an agent (Claude Code, headless) answering one architecture question **with** and **without** CodeGraph, at the **median of 4 runs per arm**. _Re-validated on Opus 4.8 (2026-06-02), on the current build (`codegraph_explore` as the primary tool)._
+
+> **The universal win — every repo, every size: 58% fewer tool calls · 22% faster · file reads cut to ~zero.**
 
 
-> **Average: 16% cheaper · 47% fewer tokens · 22% faster · 58% fewer tool calls**
+The reliable, universal payoff is **surgical context and speed**: CodeGraph collapses the agent's grep/find/Read crawl into a few direct queries — returning the exact methods you asked about even when they're buried in a multi-thousand-line file — so it answers with **near-zero file reads** while the no-CodeGraph agent spends its budget on discovery. The **Tokens** and **Cost** columns are real too, but — as noted above — they're **scale-dependent**: small and noisy per query, compounding into real money only at large-codebase, high-volume scale.
 
 
-| Codebase | Language | Cost | Tokens | Time | Tool calls |
-|----------|----------|------|--------|------|------------|
-| **VS Code** | TypeScript · ~10k files | 18% cheaper | 64% fewer | 11% faster | 81% fewer |
-| **Excalidraw** | TypeScript · ~640 | even | 25% fewer | 27% faster | 40% fewer |
-| **Django** | Python · ~3k | 8% cheaper | 60% fewer | 13% faster | 77% fewer |
-| **Tokio** | Rust · ~790 | even | 38% fewer | 18% faster | 57% fewer |
-| **OkHttp** | Java · ~645 | 25% cheaper | 54% fewer | 31% faster | 50% fewer |
-| **Gin** | Go · ~110 | 19% cheaper | 23% fewer | 24% faster | 44% fewer |
-| **Alamofire** | Swift · ~110 | 40% cheaper | 64% fewer | 33% faster | 58% fewer |
+| Codebase | Language | Tool calls | Time | File reads | Tokens | Cost |
+|----------|----------|------------|------|------------|--------|------|
+| **VS Code** | TypeScript · ~10k files | 81% fewer | 11% faster | 0 vs 9 | 64% fewer | 18% cheaper |
+| **Excalidraw** | TypeScript · ~640 | 40% fewer | 27% faster | 0 vs 7 | 25% fewer | even |
+| **Django** | Python · ~3k | 77% fewer | 13% faster | 0 vs 9 | 60% fewer | 8% cheaper |
+| **Tokio** | Rust · ~790 | 57% fewer | 18% faster | 0 vs 8 | 38% fewer | even |
+| **OkHttp** | Java · ~645 | 50% fewer | 31% faster | 0 vs 4 | 54% fewer | 25% cheaper |
+| **Gin** | Go · ~110 | 44% fewer | 24% faster | 1 vs 6 | 23% fewer | 19% cheaper |
+| **Alamofire** | Swift · ~110 | 58% fewer | 33% faster | 0 vs 9 | 64% fewer | 40% cheaper |
 
 
-CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — across small, medium, and large codebases — and answers them with **near-zero file reads**, while the no-CodeGraph agent spends its budget on grep/find/Read discovery. `codegraph_explore` shows the answer in full — the mechanism plus the exact methods you asked about, even when they're buried in a multi-thousand-line file — while collapsing redundant interchangeable implementations to signatures, so the response is sized to the *answer* rather than the file count. **Cost stays flat-to-cheaper everywhere** — largest on the small repos (Alamofire, OkHttp), roughly break-even on the most response-heavy ones (Excalidraw, Tokio), where CodeGraph trades the no-CodeGraph agent's many small grep/read round-trips for a few large, cache-heavy tool responses.
+<sub>**File reads** = median files the agent opened **with** vs **without** CodeGraph — the surgical-context win in one column. **Tokens** and **Cost** are the same with-vs-without deltas; they're directional (they move run-to-run) and, per query, small in absolute terms — which is why they only become a line item at scale. `codegraph_explore` also collapses redundant interchangeable implementations to signatures, so a response is sized to the *answer* rather than the file count.</sub>
 
 
 <details>
 <details>
 <summary><strong>Per-repo breakdown — WITH vs WITHOUT (median of 4)</strong></summary>
 <summary><strong>Per-repo breakdown — WITH vs WITHOUT (median of 4)</strong></summary>
@@ -234,7 +240,7 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr
 
 
 | | |
 | | |
 |---|---|
 |---|---|
-| **Smart Context Building** | One tool call returns entry points, related symbols, and code snippets — no expensive exploration agents |
+| **Surgical Context** | One tool call returns entry points, related symbols, and code snippets — no slow file-by-file exploration |
 | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 |
 | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 |
 | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes |
 | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes |
 | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config |
 | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config |

+ 7 - 6
__tests__/adaptive-explore-sizing.test.ts

@@ -35,15 +35,16 @@ import CodeGraph from '../src/index';
 // (the steer-to-explore phrasing changed when the Read invitation was removed).
 // (the steer-to-explore phrasing changed when the Read invitation was removed).
 const SKELETON_MARK = '· skeleton (signatures only';
 const SKELETON_MARK = '· skeleton (signatures only';
 
 
-/** Return the `#### <path> ...` section for a file basename, header through the
- *  line before the next `###`/`####` header (or end of output). */
+/** Return the ``**`<path>`** ...`` section for a file basename, header through the
+ *  line before the next bold header (or end of output). Headers are bold labels,
+ *  not ATX headings (issue #778); file sections start with ``**` ``. */
 function sectionFor(text: string, basename: string): string {
 function sectionFor(text: string, basename: string): string {
   const lines = text.split('\n');
   const lines = text.split('\n');
-  const start = lines.findIndex((l) => l.startsWith('#### ') && l.includes(basename));
+  const start = lines.findIndex((l) => l.startsWith('**`') && l.includes(basename));
   if (start < 0) return '';
   if (start < 0) return '';
   let end = lines.length;
   let end = lines.length;
   for (let i = start + 1; i < lines.length; i++) {
   for (let i = start + 1; i < lines.length; i++) {
-    if (lines[i].startsWith('### ') || lines[i].startsWith('#### ')) {
+    if (lines[i].startsWith('**')) {
       end = i;
       end = i;
       break;
       break;
     }
     }
@@ -284,7 +285,7 @@ export class YamlCodec extends Codec {
     const text = result.content?.[0]?.text ?? '';
     const text = result.content?.[0]?.text ?? '';
 
 
     // Precondition: the spine must have formed, or nothing skeletonizes.
     // Precondition: the spine must have formed, or nothing skeletonizes.
-    expect(text).toContain('## Flow (call path among the symbols you queried)');
+    expect(text).toContain('**Flow (call path among the symbols you queried)');
 
 
     for (const [file, marker] of [
     for (const [file, marker] of [
       ['bridge-interceptor.ts', 'BRIDGE_BODY_MARKER'],
       ['bridge-interceptor.ts', 'BRIDGE_BODY_MARKER'],
@@ -345,7 +346,7 @@ export class YamlCodec extends Codec {
   it('spares an off-spine sibling when the agent NAMED a callable in it (RealCall fix)', async () => {
   it('spares an off-spine sibling when the agent NAMED a callable in it (RealCall fix)', async () => {
     const result = await handler.execute('codegraph_explore', { query: SPARE_QUERY, maxFiles: 15 });
     const result = await handler.execute('codegraph_explore', { query: SPARE_QUERY, maxFiles: 15 });
     const text = result.content?.[0]?.text ?? '';
     const text = result.content?.[0]?.text ?? '';
-    expect(text).toContain('## Flow (call path among the symbols you queried)');
+    expect(text).toContain('**Flow (call path among the symbols you queried)');
 
 
     // auth-interceptor.ts is an off-spine Interceptor sibling — would skeletonize —
     // auth-interceptor.ts is an off-spine Interceptor sibling — would skeletonize —
     // but the agent named its method `authenticate`, so it stays FULL.
     // but the agent named its method `authenticate`, so it stays FULL.

+ 147 - 0
__tests__/c-fnptr-synthesizer.test.ts

@@ -0,0 +1,147 @@
+/**
+ * C/C++ function-pointer dispatch synthesis (#932).
+ *
+ * C polymorphism is the function pointer: a struct fn-pointer field, registered
+ * to concrete functions in a table (positional `{"add", cmd_add}` or designated
+ * `.fn = cmd_add`) or by assignment, then dispatched indirectly (`p->fn(argv)`).
+ * Static extraction sees neither the registration→field binding nor the
+ * indirect call, so the dispatcher→handler edge is missing. These tests prove
+ * the bridge keyed by (struct type, fn-pointer field): the command-table shape,
+ * designated init, the typedef'd-field + field←field double-hop (the issue's
+ * own hook_demo.c shape), by-value dispatch, and the precision boundaries
+ * (a data field is never bridged, distinct fn-pointer fields don't cross-bleed,
+ * and a non-C project is a no-op).
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { CodeGraph } from '../src';
+
+describe('c-fnptr dispatch synthesizer', () => {
+  let dir: string;
+  beforeEach(() => { dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cfp-')); });
+  afterEach(() => { fs.rmSync(dir, { recursive: true, force: true }); });
+
+  const write = (rel: string, body: string) => {
+    const p = path.join(dir, rel);
+    fs.mkdirSync(path.dirname(p), { recursive: true });
+    fs.writeFileSync(p, body);
+  };
+
+  const load = async () => {
+    const cg = await CodeGraph.init(dir, { silent: true });
+    await cg.indexAll();
+    const db = (cg as any).db.db;
+    const edges: { src: string; tgt: string; via: string }[] = db
+      .prepare(
+        `SELECT s.name src, t.name tgt, json_extract(e.metadata,'$.via') via
+         FROM edges e JOIN nodes s ON s.id = e.source JOIN nodes t ON t.id = e.target
+         WHERE json_extract(e.metadata,'$.synthesizedBy') = 'fn-pointer-dispatch'`
+      )
+      .all();
+    cg.close?.();
+    return edges;
+  };
+  const has = (edges: any[], src: string, tgt: string) => edges.some((e) => e.src === src && e.tgt === tgt);
+
+  it('bridges a {name, fn} command table dispatched through p->fn() (the git shape)', async () => {
+    write('cmd.c', `
+struct cmd { const char *name; int (*fn)(int argc); };
+static int cmd_add(int argc) { return argc + 1; }
+static int cmd_rm(int argc) { return argc - 1; }
+static int cmd_noop(int argc) { return argc; }   /* defined, NOT in the table */
+
+static struct cmd commands[] = {
+    { "add", cmd_add },
+    { "rm",  cmd_rm  },
+};
+
+int run_builtin(struct cmd *p, int argc) {
+    return p->fn(argc);
+}
+`);
+    const edges = await load();
+    expect(has(edges, 'run_builtin', 'cmd_add')).toBe(true);
+    expect(has(edges, 'run_builtin', 'cmd_rm')).toBe(true);
+    expect(edges.every((e) => e.via === 'cmd.fn')).toBe(true);
+    // PRECISION: a function not registered in the table is never a target.
+    expect(has(edges, 'run_builtin', 'cmd_noop')).toBe(false);
+  });
+
+  it('bridges designated-init (.handler = fn) and by-value c.fn() dispatch', async () => {
+    write('ops.c', `
+struct ops { int (*handler)(void); int size; };
+static int on_open(void) { return 1; }
+static struct ops the_ops = { .handler = on_open, .size = 4 };
+
+int dispatch(struct ops o) { return o.handler(); }
+`);
+    const edges = await load();
+    expect(has(edges, 'dispatch', 'on_open')).toBe(true);
+    expect(edges.every((e) => e.via === 'ops.handler')).toBe(true);
+  });
+
+  it('bridges the typedef-field + field←field double-hop (the hook_demo.c shape)', async () => {
+    write('hook.c', `
+typedef void (*hook_func)(void);
+struct hooks { hook_func func; };
+struct entry { const char *name; hook_func fn; };
+
+static void hk_set(void) {}
+static void hk_get(void) {}
+
+static const struct entry registry[] = {
+    { "set", hk_set },
+    { "get", hk_get },
+};
+
+void call(struct hooks *h, const struct entry *found) {
+    h->func = found->fn;   /* generic slot reassigned from the registry */
+    h->func();             /* dispatch through hooks.func */
+}
+`);
+    const edges = await load();
+    // hooks.func has no direct registration; it inherits entry.fn's via h->func = found->fn.
+    expect(has(edges, 'call', 'hk_set')).toBe(true);
+    expect(has(edges, 'call', 'hk_get')).toBe(true);
+  });
+
+  it('keys by (struct, field): distinct fn-pointer fields do not cross-bleed', async () => {
+    write('vtable.c', `
+struct io { int (*read)(void); int (*write)(int); };
+static int do_read(void) { return 0; }
+static int do_write(int x) { return x; }
+static struct io io = { .read = do_read, .write = do_write };
+
+int only_reads(struct io *p) { return p->read(); }
+`);
+    const edges = await load();
+    // only_reads dispatches ->read → do_read, and must NOT reach do_write (a different field).
+    expect(has(edges, 'only_reads', 'do_read')).toBe(true);
+    expect(has(edges, 'only_reads', 'do_write')).toBe(false);
+  });
+
+  it('does not bridge a plain data field, and no-ops on a struct with no dispatch', async () => {
+    write('data.c', `
+struct box { int count; int (*fn)(void); };
+static int helper(void) { return 0; }
+static struct box b = { .count = 3, .fn = helper };
+
+/* reads a data field and never dispatches the fn pointer */
+int total(struct box *x) { return x->count + 1; }
+`);
+    const edges = await load();
+    // No indirect dispatch happens, so there are no synthesized edges at all.
+    expect(edges.length).toBe(0);
+  });
+
+  it('is a no-op on a project with no C/C++ (clean control)', async () => {
+    write('app.js', `
+const handlers = { add: (x) => x + 1, rm: (x) => x - 1 };
+function run(name, x) { return handlers[name](x); }
+`);
+    const edges = await load();
+    expect(edges.length).toBe(0);
+  });
+});

+ 12 - 12
__tests__/dynamic-boundaries.test.ts

@@ -184,7 +184,7 @@ describe('codegraph_explore — dynamic boundaries', () => {
     const res = await handler.execute('codegraph_explore', { query: 'routeSave onSave' });
     const res = await handler.execute('codegraph_explore', { query: 'routeSave onSave' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
 
 
-    expect(text).toContain('## Dynamic boundaries');
+    expect(text).toContain('**Dynamic boundaries');
     expect(text).toContain('computed member call');
     expect(text).toContain('computed member call');
     expect(text).toMatch(/router\.ts:6/); // the exact dispatch site
     expect(text).toMatch(/router\.ts:6/); // the exact dispatch site
     expect(text).toContain('candidates for key `save`');
     expect(text).toContain('candidates for key `save`');
@@ -212,7 +212,7 @@ describe('codegraph_explore — dynamic boundaries', () => {
     const res = await handler.execute('codegraph_explore', { query: 'route onSave' });
     const res = await handler.execute('codegraph_explore', { query: 'route onSave' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
 
 
-    expect(text).toContain('## Dynamic boundaries');
+    expect(text).toContain('**Dynamic boundaries');
     expect(text).toContain('computed member call');
     expect(text).toContain('computed member call');
     expect(text).not.toContain('candidates for key'); // runtime key → no shortlist to claim
     expect(text).not.toContain('candidates for key'); // runtime key → no shortlist to claim
   });
   });
@@ -234,7 +234,7 @@ describe('codegraph_explore — dynamic boundaries', () => {
     // `processPayment` does not exist anywhere — only `route` resolves.
     // `processPayment` does not exist anywhere — only `route` resolves.
     const res = await handler.execute('codegraph_explore', { query: 'route processPayment' });
     const res = await handler.execute('codegraph_explore', { query: 'route processPayment' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
-    expect(text).toContain('## Dynamic boundaries');
+    expect(text).toContain('**Dynamic boundaries');
   });
   });
 
 
   it('renders a direct synthesized emit→handler hop as a dynamic-dispatch link (#687 criterion 1)', async () => {
   it('renders a direct synthesized emit→handler hop as a dynamic-dispatch link (#687 criterion 1)', async () => {
@@ -267,11 +267,11 @@ describe('codegraph_explore — dynamic boundaries', () => {
     const res = await handler.execute('codegraph_explore', { query: 'completeCheckout settleInvoice' });
     const res = await handler.execute('codegraph_explore', { query: 'completeCheckout settleInvoice' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
 
 
-    expect(text).toContain('## Dynamic-dispatch links among your symbols');
+    expect(text).toContain('**Dynamic-dispatch links among your symbols');
     expect(text).toMatch(/completeCheckout → settleInvoice/);
     expect(text).toMatch(/completeCheckout → settleInvoice/);
     expect(text).toContain('invoice.settled');
     expect(text).toContain('invoice.settled');
     // Connected via the synthesized edge — no boundary to announce.
     // Connected via the synthesized edge — no boundary to announce.
-    expect(text).not.toContain('## Dynamic boundaries');
+    expect(text).not.toContain('**Dynamic boundaries');
   });
   });
 
 
   it('never adds the section to a fully connected flow', async () => {
   it('never adds the section to a fully connected flow', async () => {
@@ -285,8 +285,8 @@ describe('codegraph_explore — dynamic boundaries', () => {
 
 
     const res = await handler.execute('codegraph_explore', { query: 'stepOne stepThree' });
     const res = await handler.execute('codegraph_explore', { query: 'stepOne stepThree' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
-    expect(text).toContain('## Flow');
-    expect(text).not.toContain('## Dynamic boundaries');
+    expect(text).toContain('**Flow');
+    expect(text).not.toContain('**Dynamic boundaries');
   });
   });
 
 
   it('python getattr dispatch surfaces with a prefix-key candidate', async () => {
   it('python getattr dispatch surfaces with a prefix-key candidate', async () => {
@@ -305,7 +305,7 @@ describe('codegraph_explore — dynamic boundaries', () => {
     const res = await handler.execute('codegraph_explore', { query: 'process handle_save' });
     const res = await handler.execute('codegraph_explore', { query: 'process handle_save' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
 
 
-    expect(text).toContain('## Dynamic boundaries');
+    expect(text).toContain('**Dynamic boundaries');
     expect(text).toContain('getattr');
     expect(text).toContain('getattr');
     expect(text).toContain('handle_save');
     expect(text).toContain('handle_save');
   });
   });
@@ -373,7 +373,7 @@ describe('codegraph_explore — interface dispatch', () => {
     const res = await handler.execute('codegraph_explore', { query: 'processRunExecutionData executeNode execute' });
     const res = await handler.execute('codegraph_explore', { query: 'processRunExecutionData executeNode execute' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
 
 
-    expect(text).toContain('## Interface dispatch (a named method has many implementations)');
+    expect(text).toContain('**Interface dispatch (a named method has many implementations)');
     expect(text).toMatch(/`execute` → runtime dispatch to \*\*9\*\* types implementing `INodeType`/);
     expect(text).toMatch(/`execute` → runtime dispatch to \*\*9\*\* types implementing `INodeType`/);
     // a couple of concrete targets, with file:line
     // a couple of concrete targets, with file:line
     expect(text).toMatch(/\b\w+Node\.execute` \(/);
     expect(text).toMatch(/\b\w+Node\.execute` \(/);
@@ -392,8 +392,8 @@ describe('codegraph_explore — interface dispatch', () => {
 
 
     const res = await handler.execute('codegraph_explore', { query: 'stepOne stepThree' });
     const res = await handler.execute('codegraph_explore', { query: 'stepOne stepThree' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
-    expect(text).toContain('## Flow');
-    expect(text).not.toContain('## Interface dispatch');
+    expect(text).toContain('**Flow');
+    expect(text).not.toContain('**Interface dispatch');
   });
   });
 
 
   it('stays SILENT when the interface family is below the polymorphism threshold (3 impls)', async () => {
   it('stays SILENT when the interface family is below the polymorphism threshold (3 impls)', async () => {
@@ -401,6 +401,6 @@ describe('codegraph_explore — interface dispatch', () => {
 
 
     const res = await handler.execute('codegraph_explore', { query: 'processRunExecutionData executeNode execute' });
     const res = await handler.execute('codegraph_explore', { query: 'processRunExecutionData executeNode execute' });
     const text = res.content[0].text as string;
     const text = res.content[0].text as string;
-    expect(text).not.toContain('## Interface dispatch');
+    expect(text).not.toContain('**Interface dispatch');
   });
   });
 });
 });

+ 1 - 1
__tests__/explore-blast-radius.test.ts

@@ -55,7 +55,7 @@ describe('codegraph_explore — blast radius', () => {
     const res = await handler.execute('codegraph_explore', { query: 'target' });
     const res = await handler.execute('codegraph_explore', { query: 'target' });
     const text = res.content[0].text;
     const text = res.content[0].text;
 
 
-    expect(text).toContain('### Blast radius');
+    expect(text).toContain('**Blast radius');
     expect(text).toContain('`target`');
     expect(text).toContain('`target`');
     expect(text).toMatch(/caller/); // a caller count is reported
     expect(text).toMatch(/caller/); // a caller count is reported
     // It names WHERE (the caller file) — not the caller's source body.
     // It names WHERE (the caller file) — not the caller's source body.

+ 3 - 2
__tests__/explore-corroboration-ranking.test.ts

@@ -28,11 +28,12 @@ import * as os from 'os';
 import CodeGraph from '../src/index';
 import CodeGraph from '../src/index';
 import { ToolHandler } from '../src/mcp/tools';
 import { ToolHandler } from '../src/mcp/tools';
 
 
-/** Paths that explore rendered as full-body `#### <path> —` source sections. */
+/** Paths that explore rendered as full-body ``**`<path>`** —`` source sections.
+ *  Headers are bold labels, not ATX headings (issue #778). */
 function sourcedFiles(text: string): string[] {
 function sourcedFiles(text: string): string[] {
   const out: string[] = [];
   const out: string[] = [];
   for (const line of text.split('\n')) {
   for (const line of text.split('\n')) {
-    const m = line.match(/^#### (.+?) —/);
+    const m = line.match(/^\*\*`(.+?)`\*\* —/);
     if (m) out.push(m[1].trim());
     if (m) out.push(m[1].trim());
   }
   }
   return out;
   return out;

+ 2 - 2
__tests__/explore-output-budget.test.ts

@@ -206,8 +206,8 @@ describe('codegraph_explore output respects the adaptive budget', () => {
     const text = result.content?.[0]?.text ?? '';
     const text = result.content?.[0]?.text ?? '';
     // Either there are relationships, or no edges were significant — both are fine.
     // Either there are relationships, or no edges were significant — both are fine.
     // We just want to confirm we did not accidentally gate it off.
     // We just want to confirm we did not accidentally gate it off.
-    const hasRelationships = text.includes('### Relationships');
-    const sourceFollowsHeader = text.indexOf('### Source Code') > 0;
+    const hasRelationships = text.includes('**Relationships');
+    const sourceFollowsHeader = text.indexOf('**Source Code') > 0;
     expect(hasRelationships || sourceFollowsHeader).toBe(true);
     expect(hasRelationships || sourceFollowsHeader).toBe(true);
   });
   });
 
 

+ 2 - 2
__tests__/explore-synth-constant-endpoints.test.ts

@@ -9,7 +9,7 @@
  * "### Relationships" section would have caught it, but that is disabled below 500 files.
  * "### Relationships" section would have caught it, but that is disabled below 500 files.
  * Net: on a small RTK app the synthesized edge existed in the graph yet was invisible to
  * Net: on a small RTK app the synthesized edge existed in the graph yet was invisible to
  * the agent. The fix feeds a `dynNamed` set (named non-callable endpoints that participate
  * the agent. The fix feeds a `dynNamed` set (named non-callable endpoints that participate
- * in a heuristic edge) to the tier-independent "## Dynamic-dispatch links" scan. This test
+ * in a heuristic edge) to the tier-independent "**Dynamic-dispatch links**" scan. This test
  * pins it on a deliberately tiny (<150-file) fixture so the Relationships gate is OFF and
  * pins it on a deliberately tiny (<150-file) fixture so the Relationships gate is OFF and
  * the dynamic-dispatch-links path is the ONLY thing that can surface the hop.
  * the dynamic-dispatch-links path is the ONLY thing that can surface the hop.
  */
  */
@@ -77,7 +77,7 @@ export const outerThunk = createAsyncThunk('app/outer', async (n: number, { disp
 
 
     // The synthesized hop now surfaces (was invisible: both endpoints `constant` AND the
     // The synthesized hop now surfaces (was invisible: both endpoints `constant` AND the
     // small-repo Relationships section is off).
     // small-repo Relationships section is off).
-    expect(text).toContain('## Dynamic-dispatch links among your symbols');
+    expect(text).toContain('**Dynamic-dispatch links among your symbols');
     expect(text).toMatch(/outerThunk\s+→\s+innerThunk/);
     expect(text).toMatch(/outerThunk\s+→\s+innerThunk/);
     // It reads as a dynamic-dispatch bridge with its wiring site, not a bare `calls`.
     // It reads as a dynamic-dispatch bridge with its wiring site, not a bare `calls`.
     expect(text).toMatch(/dynamic: redux thunk @/);
     expect(text).toMatch(/dynamic: redux thunk @/);

+ 57 - 0
__tests__/extraction.test.ts

@@ -2582,6 +2582,63 @@ std::unique_ptr<Widget> makeWidget() { return nullptr; }
     });
     });
   });
   });
 
 
+  describe('C++ macro-prefixed class/struct misparse (#946)', () => {
+    // An export/visibility macro before the class name plus a base clause
+    // (`class MACRO Name : public Base { … }`) makes tree-sitter read `class
+    // MACRO` as an elaborated type and the whole declaration as a
+    // function_definition named after the class, spanning the entire body — a
+    // phantom `function` that polluted callers/impact/blast-radius. It's dropped.
+    it('does not mint a phantom function for a macro-annotated class that inherits', () => {
+      const code = `#pragma once
+#define MAPCORE_EXPORT __attribute__((visibility("default")))
+
+class DataProvider {
+public:
+    virtual bool Request(void* param) = 0;
+};
+
+class MAPCORE_EXPORT LocalDataProvider : public DataProvider
+{
+public:
+    LocalDataProvider(int dataType);
+    virtual bool Request(void* param) override;
+};
+`;
+      // A header rich in C++ (class / public: / virtual) detects as C++ — the
+      // issue's exact scenario (a `.h` file). Guard it so a detection regression
+      // can't make this test pass for the wrong reason.
+      expect(detectLanguage('provider.h', code)).toBe('cpp');
+      const result = extractFromSource('provider.h', code);
+
+      // The misparse used to surface as `function | LocalDataProvider` spanning
+      // the whole class body — a false caller in the graph. It's gone now.
+      expect(
+        result.nodes.find((n) => n.name === 'LocalDataProvider' && n.kind === 'function')
+      ).toBeUndefined();
+
+      // The sibling class without the macro is unaffected — still a class.
+      expect(result.nodes.find((n) => n.name === 'DataProvider')?.kind).toBe('class');
+    });
+
+    it('drops the struct variant too, without dropping a genuine class', () => {
+      const code = `
+#define API __declspec(dllexport)
+struct API Widget : public Base { int x; };
+class Plain : public Base { public: int y; };
+`;
+      const result = extractFromSource('widget.cpp', code);
+
+      // `struct MACRO Name : Base { … }` misparses the same way — no phantom function.
+      expect(
+        result.nodes.find((n) => n.name === 'Widget' && n.kind === 'function')
+      ).toBeUndefined();
+
+      // A normal class with a base clause and no macro must still be a class — the
+      // drop is precise, not a blanket "class with inheritance" filter.
+      expect(result.nodes.find((n) => n.name === 'Plain')?.kind).toBe('class');
+    });
+  });
+
   describe('C/C++ imports', () => {
   describe('C/C++ imports', () => {
     it('should extract system include', () => {
     it('should extract system include', () => {
       const code = `#include <iostream>`;
       const code = `#include <iostream>`;

+ 156 - 0
__tests__/lombok.test.ts

@@ -0,0 +1,156 @@
+/**
+ * Lombok-generated member synthesis (Java, #912).
+ *
+ * Lombok generates getters/setters/builder/equals/hashCode/toString and the
+ * `log` field at compile time, so they never appear in the source AST. Without
+ * synthesis they're absent from the index and any `bean.getX()` / `Bean.builder()`
+ * / `log.info()` call resolves to nothing — call chains break silently. We
+ * synthesize the mechanical ones from the annotations + fields, mark them
+ * (`lombok` decorator + a docstring naming the source annotation), and they then
+ * resolve as ordinary call targets. These tests prove the synthesis, the call
+ * resolution that motivated it, and the precision boundaries (static fields
+ * skipped, hand-written members never overridden, a non-Lombok class is clean).
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { CodeGraph } from '../src';
+
+describe('lombok synthesis', () => {
+  let dir: string;
+  beforeEach(() => { dir = fs.mkdtempSync(path.join(os.tmpdir(), 'lombok-')); });
+  afterEach(() => { fs.rmSync(dir, { recursive: true, force: true }); });
+
+  const write = (rel: string, body: string) => {
+    const p = path.join(dir, rel);
+    fs.mkdirSync(path.dirname(p), { recursive: true });
+    fs.writeFileSync(p, body);
+  };
+
+  type Row = { name: string; kind: string; decorators: string | null; docstring: string | null; signature: string | null };
+  const load = async () => {
+    const cg = await CodeGraph.init(dir, { silent: true });
+    await cg.indexAll();
+    const db = (cg as any).db.db;
+    const nodes: Row[] = db.prepare(`SELECT name, kind, decorators, docstring, signature FROM nodes`).all();
+    const calls: { src: string; tgt: string }[] = db
+      .prepare(
+        `SELECT s.name src, t.name tgt FROM edges e
+         JOIN nodes s ON s.id = e.source JOIN nodes t ON t.id = e.target
+         WHERE e.kind = 'calls'`
+      )
+      .all();
+    cg.close?.();
+    return { nodes, calls };
+  };
+
+  const isLombok = (n: Row | undefined) => !!n && (n.decorators ?? '').includes('lombok');
+
+  it('synthesizes accessors that resolve as call targets, and the @Slf4j log field', async () => {
+    write('model/User.java', `package model;
+import lombok.Data;
+import lombok.Builder;
+import lombok.extern.slf4j.Slf4j;
+
+@Data
+@Builder
+@Slf4j
+public class User {
+    private String name;
+    private boolean active;
+    private static final int MAX = 10;
+}
+`);
+    write('svc/UserService.java', `package svc;
+import model.User;
+
+class UserService {
+    String describe(User user) {
+        user.setActive(true);
+        return user.getName();
+    }
+    User make() {
+        return User.builder();
+    }
+}
+`);
+
+    const { nodes, calls } = await load();
+    const byName = (name: string) => nodes.find((n) => n.name === name && isLombok(n));
+
+    // Accessors + Data contract + builder are synthesized and marked.
+    for (const m of ['getName', 'setName', 'isActive', 'setActive', 'builder', 'equals', 'hashCode', 'toString']) {
+      expect(isLombok(byName(m)), `expected synthesized ${m}`).toBe(true);
+    }
+    expect(byName('getName')!.docstring).toMatch(/Lombok-generated/);
+    expect(byName('getName')!.signature).toBe('String getName()');
+    expect(byName('isActive')!.signature).toBe('boolean isActive()'); // boolean → is-prefix
+    expect(byName('builder')!.signature).toContain('static ');
+
+    // @Slf4j → a `log` field.
+    expect(isLombok(nodes.find((n) => n.name === 'log' && n.kind === 'field'))).toBe(true);
+
+    // PRECISION: a static field gets no accessor.
+    expect(nodes.some((n) => n.name === 'getMAX' || n.name === 'getMax')).toBe(false);
+
+    // THE FIX: calls to Lombok-generated methods resolve to their synthesized target.
+    const resolved = (src: string, tgt: string) => calls.some((c) => c.src === src && c.tgt === tgt);
+    expect(resolved('describe', 'getName')).toBe(true);
+    expect(resolved('describe', 'setActive')).toBe(true);
+    expect(resolved('make', 'builder')).toBe(true);
+  });
+
+  it('never overrides a hand-written accessor', async () => {
+    write('model/Account.java', `package model;
+import lombok.Getter;
+
+@Getter
+public class Account {
+    private int balance;
+    private String owner;
+
+    // explicit getter — Lombok skips it, so must we
+    public int getBalance() { return balance < 0 ? 0 : balance; }
+}
+`);
+    const { nodes } = await load();
+    const getBalance = nodes.filter((n) => n.name === 'getBalance');
+    expect(getBalance.length).toBe(1);           // exactly one, not duplicated
+    expect(isLombok(getBalance[0])).toBe(false); // the hand-written one survives
+    // the un-shadowed field still gets its synthesized getter
+    expect(isLombok(nodes.find((n) => n.name === 'getOwner'))).toBe(true);
+  });
+
+  it('field-level @Getter/@Setter and final-field rules', async () => {
+    write('model/Box.java', `package model;
+import lombok.Getter;
+import lombok.Setter;
+
+public class Box {
+    @Getter @Setter private String label;
+    @Getter private final long id;     // final → getter only, no setter
+    private int hidden;                // no annotation → nothing
+}
+`);
+    const { nodes } = await load();
+    expect(isLombok(nodes.find((n) => n.name === 'getLabel'))).toBe(true);
+    expect(isLombok(nodes.find((n) => n.name === 'setLabel'))).toBe(true);
+    expect(isLombok(nodes.find((n) => n.name === 'getId'))).toBe(true);
+    expect(nodes.some((n) => n.name === 'setId')).toBe(false);     // final → no setter
+    expect(nodes.some((n) => n.name === 'getHidden')).toBe(false); // un-annotated → nothing
+  });
+
+  it('produces no synthesized members for a plain Java class (clean control)', async () => {
+    write('model/Plain.java', `package model;
+
+public class Plain {
+    private int value;
+    public int getValue() { return value; }
+    public void setValue(int v) { this.value = v; }
+}
+`);
+    const { nodes } = await load();
+    expect(nodes.some((n) => isLombok(n))).toBe(false);
+  });
+});

+ 51 - 0
__tests__/mcp-catchup-gate.test.ts

@@ -110,6 +110,57 @@ describe('MCP catch-up gate', () => {
     expect(cg.getStats().fileCount).toBe(0);
     expect(cg.getStats().fileCount).toBe(0);
   });
   });
 
 
+  it('does not hang the first call when catch-up runs past the timeout (#905)', async () => {
+    // The issue #905 hang: on a huge repo the post-open reconcile takes minutes,
+    // and gating the first tool call on all of it reads as a multi-minute hang.
+    // With the time-box, the call is served promptly and the reconcile finishes
+    // in the background.
+    const prev = process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
+    process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS = '50';
+    let timer: NodeJS.Timeout | undefined;
+    try {
+      let gateResolved = false;
+      const gate = new Promise<void>((resolve) => {
+        timer = setTimeout(() => { gateResolved = true; resolve(); }, 5000);
+      });
+      handler.setCatchUpGate(gate);
+
+      const started = Date.now();
+      const res = await handler.execute('codegraph_search', { query: 'survivor' });
+      const elapsed = Date.now() - started;
+
+      expect(res.isError).toBeFalsy();
+      expect(res.content[0].text).toMatch(/survivor/);
+      // Served on the timeout (~50ms), NOT after the 5s reconcile.
+      expect(gateResolved).toBe(false);
+      expect(elapsed).toBeLessThan(2000);
+    } finally {
+      if (timer) clearTimeout(timer);
+      if (prev === undefined) delete process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
+      else process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS = prev;
+    }
+  });
+
+  it('CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS=0 restores the unbounded wait', async () => {
+    const prev = process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
+    process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS = '0';
+    try {
+      let gateResolved = false;
+      const gate = new Promise<void>((resolve) => {
+        setTimeout(() => { gateResolved = true; resolve(); }, 80);
+      });
+      handler.setCatchUpGate(gate);
+
+      const res = await handler.execute('codegraph_search', { query: 'survivor' });
+      // With the time-box disabled, the call waits for the full reconcile.
+      expect(gateResolved).toBe(true);
+      expect(res.isError).toBeFalsy();
+    } finally {
+      if (prev === undefined) delete process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
+      else process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS = prev;
+    }
+  });
+
   it('gate that rejects does not break the tool call', async () => {
   it('gate that rejects does not break the tool call', async () => {
     // A catch-up sync failure (lock contention, transient FS error) must
     // A catch-up sync failure (lock contention, transient FS error) must
     // not poison tool dispatch — the engine logs it, the handler proceeds.
     // not poison tool dispatch — the engine logs it, the handler proceeds.

+ 2 - 2
__tests__/mcp-staleness-banner.test.ts

@@ -175,7 +175,7 @@ describe('MCP staleness banner', () => {
 
 
     const res = await handler.execute('codegraph_status', {});
     const res = await handler.execute('codegraph_status', {});
     const text = res.content[0].text;
     const text = res.content[0].text;
-    expect(text).toContain('### Pending sync:');
+    expect(text).toContain('**Pending sync:');
     expect(text).toContain('src/charlie-only.ts');
     expect(text).toContain('src/charlie-only.ts');
     // Status embeds the info first-class, so the auto-banner is suppressed.
     // Status embeds the info first-class, so the auto-banner is suppressed.
     expect(text.startsWith('⚠️')).toBe(false);
     expect(text.startsWith('⚠️')).toBe(false);
@@ -204,7 +204,7 @@ describe('MCP staleness banner', () => {
 
 
     const res = await handler.execute('codegraph_status', {});
     const res = await handler.execute('codegraph_status', {});
     const text = res.content[0].text;
     const text = res.content[0].text;
-    expect(text).toContain('### Auto-sync disabled:');
+    expect(text).toContain('**Auto-sync disabled:');
     expect(text).toContain('OS watch/file limit exhausted');
     expect(text).toContain('OS watch/file limit exhausted');
     // status renders the notice inline, so the auto-banner is not also prepended.
     // status renders the notice inline, so the auto-banner is not also prepended.
     expect(text.startsWith('⚠️')).toBe(false);
     expect(text.startsWith('⚠️')).toBe(false);

+ 57 - 0
__tests__/multi-repo-workspace.test.ts

@@ -131,6 +131,42 @@ describe('multi-repo workspaces (#514)', () => {
     expect(files).toContain('vendored/lib.ts');
     expect(files).toContain('vendored/lib.ts');
   });
   });
 
 
+  it('skips a submodule worktree instead of indexing it as a duplicate (#945)', () => {
+    // A worktree OF A SUBMODULE points its `.git` into
+    // `.git/modules/<module>/worktrees/<name>` — not the top-level repo's
+    // `.git/worktrees/`. The detector used to miss that extra `modules/<name>`
+    // segment, so the worktree fell through to "embedded" and every symbol it
+    // shared with the real submodule checkout got indexed twice. The submodule's
+    // own checkout (`.git/modules/<module>`, no `worktrees/`) is distinct code
+    // and must stay indexed (#514).
+    const upstream = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-945-up-'));
+    try {
+      // The repo that becomes the submodule's origin.
+      write(path.join(upstream, 'lib.ts'), 'export function libFn() { return 1; }\n');
+      makeRepo(upstream);
+
+      write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
+      write(path.join(ws, '.gitignore'), '.worktrees/\n');
+      git(ws, 'init', '-q');
+      // protocol.file.allow=always: modern git refuses a local-path submodule otherwise.
+      git(ws, '-c', 'protocol.file.allow=always', 'submodule', 'add', '-q', upstream, 'common');
+      git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'add submodule');
+
+      // A worktree of the submodule, under the gitignored .worktrees/ — its `.git`
+      // points into `.git/modules/common/worktrees/<name>`.
+      git(path.join(ws, 'common'), 'worktree', 'add', '-q', '../.worktrees/common-feature', '-b', 'feature');
+
+      const files = scanDirectory(ws);
+      expect(files).toContain('src/app.ts');
+      // The real submodule checkout is distinct code — still indexed (#514).
+      expect(files).toContain('common/lib.ts');
+      // The submodule worktree is a duplicate working view — never indexed (#945).
+      expect(files.some((f) => f.includes('.worktrees'))).toBe(false);
+    } finally {
+      fs.rmSync(upstream, { recursive: true, force: true });
+    }
+  });
+
   it('non-git workspace: walks children and respects each child own .gitignore', () => {
   it('non-git workspace: walks children and respects each child own .gitignore', () => {
     write(path.join(ws, 'proj-a/src/auth.ts'), 'export function login() {}\n');
     write(path.join(ws, 'proj-a/src/auth.ts'), 'export function login() {}\n');
     write(path.join(ws, 'proj-a/build/out.ts'), 'export function generated() {}\n');
     write(path.join(ws, 'proj-a/build/out.ts'), 'export function generated() {}\n');
@@ -203,6 +239,27 @@ describe('multi-repo workspaces (#514)', () => {
     expect(scope.ignores('src/app.ts')).toBe(false);
     expect(scope.ignores('src/app.ts')).toBe(false);
   });
   });
 
 
+  it('buildScopeIgnore: indexed root is itself a gitignored subdir of an enclosing repo (#936)', () => {
+    // `child/` is NOT its own repo, so `git` resolves the ENCLOSING repo from
+    // inside it — and `git ls-files --directory`, whose cwd is then a wholly
+    // ignored directory, emits the literal `./` ("this entire directory").
+    // That sentinel used to reach the `ignore` matcher and throw
+    // ("path should be a `path.relative()`d string, but got "./""), aborting
+    // buildScopeIgnore → the MCP daemon's watcher never started and auto-sync
+    // silently stalled until a manual `codegraph sync`.
+    write(path.join(ws, 'child/src/a.ts'), 'export const x = 1;\n');
+    write(path.join(ws, '.gitignore'), '/child/\n');
+    makeRepo(ws);
+
+    const child = path.join(ws, 'child');
+    // The crux: building scope for the ignored subdir must not throw.
+    const scope = buildScopeIgnore(child);
+    // The subdir's own source is watchable/indexable, not ignored.
+    expect(scope.ignores('src/a.ts')).toBe(false);
+    // And the `./` self entry must not be mistaken for a nested embedded repo.
+    expect(discoverEmbeddedRepoRoots(child)).toEqual([]);
+  });
+
   it('sync picks up a change inside a gitignored embedded repo', async () => {
   it('sync picks up a change inside a gitignored embedded repo', async () => {
     write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
     write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
     makeRepo(path.join(ws, 'packages/proj-a'));
     makeRepo(path.join(ws, 'packages/proj-a'));

+ 1 - 1
__tests__/node-file-view.test.ts

@@ -99,7 +99,7 @@ describe('codegraph_node file-view (Read replacement)', () => {
 
 
   it('symbolsOnly returns the structural map, not the source', async () => {
   it('symbolsOnly returns the structural map, not the source', async () => {
     const out = await text({ file: 'a.ts', symbolsOnly: true });
     const out = await text({ file: 'a.ts', symbolsOnly: true });
-    expect(out).toContain('### Symbols');
+    expect(out).toContain('**Symbols');
     expect(out).toContain('helper');
     expect(out).toContain('helper');
     expect(out).toContain('Widget');
     expect(out).toContain('Widget');
     expect(out).not.toContain('return x + 1'); // bodies are NOT included in the map
     expect(out).not.toContain('return x + 1'); // bodies are NOT included in the map

+ 4 - 4
__tests__/offload.test.ts

@@ -231,16 +231,16 @@ describe('reasoning offload', () => {
   describe('stripAgentDirectives', () => {
   describe('stripAgentDirectives', () => {
     it('drops the agent-directed header but keeps source sections', () => {
     it('drops the agent-directed header but keeps source sections', () => {
       const ctx = [
       const ctx = [
-        '## Exploration: how does X work',
+        '**Exploration: how does X work**',
         'Found 12 symbols across 3 files.',
         'Found 12 symbols across 3 files.',
         '',
         '',
-        '#### src/a.ts — foo(function)',
+        '**`src/a.ts`** — foo(function)',
         'code body',
         'code body',
       ].join('\n');
       ].join('\n');
       const stripped = stripAgentDirectives(ctx);
       const stripped = stripAgentDirectives(ctx);
-      expect(stripped).not.toContain('## Exploration:');
+      expect(stripped).not.toContain('**Exploration:');
       expect(stripped).not.toContain('Found 12 symbols');
       expect(stripped).not.toContain('Found 12 symbols');
-      expect(stripped).toContain('#### src/a.ts');
+      expect(stripped).toContain('**`src/a.ts`');
       expect(stripped).toContain('code body');
       expect(stripped).toContain('code body');
     });
     });
   });
   });

+ 4 - 1
__tests__/same-name-disambiguation.test.ts

@@ -99,7 +99,10 @@ describe('same-named symbols across apps (#764)', () => {
     expect(out).toContain('apps/billing/src/users/user.service.ts');
     expect(out).toContain('apps/billing/src/users/user.service.ts');
     // …and the billing section must list the billing controller, not admin's.
     // …and the billing section must list the billing controller, not admin's.
     const billingSection = out.slice(out.indexOf('apps/billing/src/users/user.service.ts'));
     const billingSection = out.slice(out.indexOf('apps/billing/src/users/user.service.ts'));
-    const billingBody = billingSection.slice(0, billingSection.indexOf('###', 3) > 0 ? billingSection.indexOf('###', 3) : undefined);
+    // The next definition heading is a line-start bold label (issue #778: ATX `###`
+    // headings became `**…**`); billingSection starts mid-heading, so `\n**` finds it.
+    const nextDef = billingSection.indexOf('\n**');
+    const billingBody = billingSection.slice(0, nextDef > 0 ? nextDef : undefined);
     expect(billingBody).toContain('apps/billing/src/users/user.controller.ts');
     expect(billingBody).toContain('apps/billing/src/users/user.controller.ts');
     expect(billingBody).not.toContain('apps/admin/src/users/user.controller.ts');
     expect(billingBody).not.toContain('apps/admin/src/users/user.controller.ts');
   });
   });

+ 125 - 0
__tests__/sync.test.ts

@@ -304,6 +304,131 @@ describe('Sync Module', () => {
     });
     });
   });
   });
 
 
+  // Incremental sync's git fast path used to consume `git status` output without
+  // the ignore matcher the full index applies — so a committed dependency dir
+  // (built-in default exclude) or a tracked file under a .gitignored dir would
+  // leak into the index via `sync`, then vanish on the next `index --force`. The
+  // git fast path must exclude exactly what the full scan does. (#766)
+  describe('Incremental sync honors the ignore matcher (#766)', () => {
+    let testDir: string;
+    let cg: CodeGraph;
+
+    function git(...args: string[]) {
+      execFileSync('git', args, { cwd: testDir, stdio: 'pipe' });
+    }
+
+    beforeEach(async () => {
+      testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-766-'));
+
+      git('init');
+      git('config', 'user.email', 'test@test.com');
+      git('config', 'user.name', 'Test');
+
+      // Real project source — must keep flowing through sync untouched.
+      fs.mkdirSync(path.join(testDir, 'src'));
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function hello() { return 'world'; }`
+      );
+
+      // A COMMITTED vendor/ dir: tracked in git, but a built-in default exclude
+      // git knows nothing about. git status happily reports edits to it.
+      fs.mkdirSync(path.join(testDir, 'vendor'));
+      fs.writeFileSync(
+        path.join(testDir, 'vendor', 'lib.ts'),
+        `export function vendoredHelper() { return 1; }`
+      );
+
+      // A tracked file inside a .gitignored dir: gitignore is a no-op for files
+      // already committed, so git status still reports modifications to it.
+      fs.writeFileSync(path.join(testDir, '.gitignore'), 'generated/\n');
+      fs.mkdirSync(path.join(testDir, 'generated'));
+      fs.writeFileSync(
+        path.join(testDir, 'generated', 'out.ts'),
+        `export function generatedThing() { return 2; }`
+      );
+
+      git('add', '-A'); // .gitignore + src/ + vendor/ (generated/ is now ignored)
+      git('add', '-f', 'generated/out.ts'); // force the ignored-but-tracked file in
+      git('commit', '-m', 'initial');
+
+      cg = CodeGraph.initSync(testDir, {
+        config: { include: ['**/*.ts'], exclude: [] },
+      });
+      await cg.indexAll();
+    });
+
+    afterEach(() => {
+      if (cg) cg.destroy();
+      if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
+    });
+
+    it('the full index excludes both (baseline the sync path must match)', () => {
+      expect(cg.searchNodes('hello').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('vendoredHelper')).toHaveLength(0);
+      expect(cg.searchNodes('generatedThing')).toHaveLength(0);
+    });
+
+    it('does not re-index a modified tracked file in a built-in excluded dir (vendor/)', () => {
+      fs.writeFileSync(
+        path.join(testDir, 'vendor', 'lib.ts'),
+        `export function vendoredHelper() { return 999; }`
+      );
+      const changes = cg.getChangedFiles();
+      expect(changes.modified).not.toContain('vendor/lib.ts');
+      expect(changes.added).not.toContain('vendor/lib.ts');
+    });
+
+    it('does not re-index a modified tracked file in a .gitignored dir', () => {
+      fs.writeFileSync(
+        path.join(testDir, 'generated', 'out.ts'),
+        `export function generatedThing() { return 999; }`
+      );
+      const changes = cg.getChangedFiles();
+      expect(changes.modified).not.toContain('generated/out.ts');
+      expect(changes.added).not.toContain('generated/out.ts');
+    });
+
+    it('does not index a new untracked file in an excluded dir', () => {
+      // vendor/ isn't in .gitignore, so an untracked file there surfaces as `??`
+      // in git status — it must still be filtered to match the full index.
+      fs.writeFileSync(
+        path.join(testDir, 'vendor', 'extra.ts'),
+        `export function vendoredExtra() { return 3; }`
+      );
+      const changes = cg.getChangedFiles();
+      expect(changes.added).not.toContain('vendor/extra.ts');
+    });
+
+    it('status (getChangedFiles) agrees with sync — no phantom pending changes', async () => {
+      // The user-visible symptom today: `codegraph status` reads getChangedFiles
+      // and reports a vendor edit as a pending change that `sync` (a filesystem
+      // reconcile) then never indexes — so the count never clears. Both must now
+      // agree that nothing happened.
+      fs.writeFileSync(
+        path.join(testDir, 'vendor', 'lib.ts'),
+        `export function vendoredHelper() { return 999; }`
+      );
+      const changes = cg.getChangedFiles();
+      expect(changes.added).toHaveLength(0);
+      expect(changes.modified).toHaveLength(0);
+
+      const result = await cg.sync();
+      expect(result.filesModified).toBe(0);
+      expect(result.changedFilePaths ?? []).not.toContain('vendor/lib.ts');
+      expect(cg.searchNodes('vendoredHelper')).toHaveLength(0);
+    });
+
+    it('still syncs a normal modified source file (no over-filtering)', () => {
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function hello() { return 'changed'; }`
+      );
+      const changes = cg.getChangedFiles();
+      expect(changes.modified).toContain('src/index.ts');
+    });
+  });
+
   describe('Cross-file module-attribute caller edges survive callee re-index (#899)', () => {
   describe('Cross-file module-attribute caller edges survive callee re-index (#899)', () => {
     let testDir: string;
     let testDir: string;
     let cg: CodeGraph;
     let cg: CodeGraph;

+ 1 - 0
docs/design/dispatch-synthesizer-backlog.md

@@ -91,6 +91,7 @@ Status legend (matches the playbook): ✅ done+validated · 🟡 shipped but und
 | MediatR | `mediatr-dispatch` | ✅ **shipped (2026-06-20)** — `_mediator.Send(x)`/`.Publish(x)` → the `Handle` of `IRequestHandler<X>`/`INotificationHandler<X>` by request type; 100% precision jasontaylor (9) / eShop (9, variable-passed), 0 on Newtonsoft control. Type from class base-list (C# has no signature) + arg resolved inline/local/param; receiver + handler-map gates. |
 | MediatR | `mediatr-dispatch` | ✅ **shipped (2026-06-20)** — `_mediator.Send(x)`/`.Publish(x)` → the `Handle` of `IRequestHandler<X>`/`INotificationHandler<X>` by request type; 100% precision jasontaylor (9) / eShop (9, variable-passed), 0 on Newtonsoft control. Type from class base-list (C# has no signature) + arg resolved inline/local/param; receiver + handler-map gates. |
 | Sidekiq | `sidekiq-dispatch` | ✅ **shipped (2026-06-20)** — `W.perform_async/_in/_at(…)` → `W#perform`, gated on `include Sidekiq::Job`/`Worker`; 100% precision loomio (47) / forem (142, both aliases), 0 on jekyll control. Name-keyed; namespaced collisions disambiguated by qualified name; ActiveJob `perform_later` excluded. |
 | Sidekiq | `sidekiq-dispatch` | ✅ **shipped (2026-06-20)** — `W.perform_async/_in/_at(…)` → `W#perform`, gated on `include Sidekiq::Job`/`Worker`; 100% precision loomio (47) / forem (142, both aliases), 0 on jekyll control. Name-keyed; namespaced collisions disambiguated by qualified name; ActiveJob `perform_later` excluded. |
 | Laravel events | `laravel-event` | ✅ **shipped (2026-06-21)** — `event(new XEvent)` → each listener's `handle`, via typed `handle(XEvent $e)` (auto-discovery, union-split) AND the `$listen` map (covers untyped handles); 100% precision koel (9, `$listen`) / firefly (141, auto-discovery), 0 on guzzle control. Jobs excluded (they use `::dispatch`). |
 | Laravel events | `laravel-event` | ✅ **shipped (2026-06-21)** — `event(new XEvent)` → each listener's `handle`, via typed `handle(XEvent $e)` (auto-discovery, union-split) AND the `$listen` map (covers untyped handles); 100% precision koel (9, `$listen`) / firefly (141, auto-discovery), 0 on guzzle control. Jobs excluded (they use `::dispatch`). |
+| C/C++ fn-pointer dispatch | `fn-pointer-dispatch` | ✅ **shipped (2026-06-22)** — FIRST C / systems-language member (#932). Keyed by **(struct type, fn-pointer field)**: a fn registered to `S.field` (positional init matched by field index, designated `.field=fn`, or `x->field=fn`) ← linked → an indirect dispatch `recv->field(…)` whose receiver resolves to `S` (param/local type, else unique-field fallback). Source-read synth (`c-fnptr-synthesizer.ts`, regex over `ctx.readFile`), NOT extraction — handles the typedef'd field (`hook_func func`) + the **field←field double-hop** (`h->func = found->fn`, the issue's `hook_demo.c` shape). Covers BOTH the command-table idiom (Shape 1) and the ops-struct/vtable idiom (Shape 2) with the same key. Validated: **git 502** (`run_builtin→cmd_*` + 7 real vtables), **redis 357** (`dictType.hashFunction`, conn vtable), **curl 478** (`Curl_cwtype.do_init→{deflate,gzip,brotli,zstd}_do_init`); **0 non-function targets** everywhere, node-stable (pure edge synth), **0 on lua** (its `{name,fn}` tables register into the VM — no C indirect call → correctly nothing to bridge). **Deferred:** direct fn-pointer *variables* (`fp=f; fp()` — not field-keyed), array-of-fn-pointers without a struct, C++ *class* fn-pointer fields (virtual dispatch already covered by `interface-impl`/`cpp-override`), and macro-built tables (redis `MAKE_CMD(…)` proc arg lives inside a macro call, not a struct initializer, so `redisCommand.proc` registrations are unbridged). |
 | (see playbook §6 / `callback-synthesizer.ts` for the other ~20 channels) | | |
 | (see playbook §6 / `callback-synthesizer.ts` for the other ~20 channels) | | |
 
 
 ### redux-thunk follow-ups (found by the n>1 validation — this is exactly what it's for)
 ### redux-thunk follow-ups (found by the n>1 validation — this is exactly what it's for)

+ 1 - 1
package.json

@@ -1,7 +1,7 @@
 {
 {
   "name": "@colbymchenry/codegraph",
   "name": "@colbymchenry/codegraph",
   "version": "1.0.1",
   "version": "1.0.1",
-  "description": "Supercharge Claude Code with semantic code intelligence. 94% fewer tool calls • 77% faster exploration • 100% local.",
+  "description": "Supercharge AI coding agents with semantic code intelligence — surgical context, fewer tool calls, faster answers. 100% local.",
   "main": "dist/index.js",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "types": "dist/index.d.ts",
   "bin": {
   "bin": {

+ 1 - 1
scripts/agent-eval/offload-eval-metrics.mjs

@@ -48,7 +48,7 @@ for (const line of lines) {
       // "### Referenced source — verbatim" appendix). A refs call that cited nothing
       // "### Referenced source — verbatim" appendix). A refs call that cited nothing
       // valid falls back to RAW source, which is correctly counted as a raw explore below.
       // valid falls back to RAW source, which is correctly counted as a raw explore below.
       if (/Synthesized by CodeGraph|### Referenced source — verbatim/.test(text)) { offloadAnswers.push(text); exploreResults++; }
       if (/Synthesized by CodeGraph|### Referenced source — verbatim/.test(text)) { offloadAnswers.push(text); exploreResults++; }
-      else if (/Found \d+ symbols? across|## Exploration:/.test(text)) exploreResults++;
+      else if (/Found \d+ symbols? across|\*\*Exploration:/.test(text)) exploreResults++;
     }
     }
   }
   }
   if (ev.type === 'result') result = ev;
   if (ev.type === 'result') result = ev;

+ 1 - 1
scripts/agent-eval/probe-explore.mjs

@@ -36,5 +36,5 @@ console.log(text);
 console.error('\n--- PROBE STATS ---');
 console.error('\n--- PROBE STATS ---');
 console.error('output chars:', text.length);
 console.error('output chars:', text.length);
 console.error('triggerRender body present (-> setState({})):', /triggerRender[\s\S]{0,400}setState\(\{\}\)/.test(text));
 console.error('triggerRender body present (-> setState({})):', /triggerRender[\s\S]{0,400}setState\(\{\}\)/.test(text));
-console.error('App.tsx in source section:', /#### .*App\.tsx —/.test(text));
+console.error('App.tsx in source section:', /\*\*`.*App\.tsx`\*\* —/.test(text));
 try { cg.close?.(); } catch {}
 try { cg.close?.(); } catch {}

+ 1 - 1
site/src/content/docs/core-concepts/how-it-works.md

@@ -21,7 +21,7 @@ files → Extraction (tree-sitter) → DB (nodes/edges/files)
 
 
 ## 2. Storage
 ## 2. Storage
 
 
-Everything goes into a local SQLite database (`.codegraph/codegraph.db`) with FTS5 full-text search. CodeGraph uses native `better-sqlite3` when available and transparently falls back to a WASM backend; `codegraph status` shows which is live.
+Everything goes into a local SQLite database (`.codegraph/codegraph.db`) with FTS5 full-text search, using Node's built-in `node:sqlite` in WAL mode from the bundled runtime.
 
 
 ## 3. Resolution
 ## 3. Resolution
 
 

+ 1 - 1
site/src/content/docs/core-concepts/resolution.md

@@ -25,6 +25,6 @@ Static parsing misses computed and indirect calls, so flows can break at dynamic
 - `EventEmitter` channels
 - `EventEmitter` channels
 - React re-render (`setState` → `render`)
 - React re-render (`setState` → `render`)
 - JSX child (`render` → child component)
 - JSX child (`render` → child component)
-- Django ORM descriptors
+- Interface → implementation dispatch
 
 
 Every synthesized edge is marked `provenance: 'heuristic'` with the site that wired it, and is shown inline wherever a path crosses it.
 Every synthesized edge is marked `provenance: 'heuristic'` with the site that wired it, and is shown inline wherever a path crosses it.

+ 5 - 4
site/src/content/docs/getting-started/installation.md

@@ -14,9 +14,10 @@ The installer will:
 - Ask which agent(s) to configure — auto-detecting installed ones from **Claude Code**, **Cursor**, **Codex CLI**, **opencode**, **Hermes Agent**, **Gemini CLI**, **Antigravity IDE**, and **Kiro**.
 - Ask which agent(s) to configure — auto-detecting installed ones from **Claude Code**, **Cursor**, **Codex CLI**, **opencode**, **Hermes Agent**, **Gemini CLI**, **Antigravity IDE**, and **Kiro**.
 - Prompt to install `codegraph` on your `PATH` (so agents can launch the MCP server).
 - Prompt to install `codegraph` on your `PATH` (so agents can launch the MCP server).
 - Ask whether configs apply to all your projects or just this one.
 - Ask whether configs apply to all your projects or just this one.
-- Write each chosen agent's MCP server config plus an instructions file (e.g. `CLAUDE.md`, `.cursor/rules/codegraph.mdc`, `~/.codex/AGENTS.md`).
+- Write each chosen agent's MCP server config, plus a small marker-fenced CodeGraph section in the agent's instructions file (`CLAUDE.md` / `AGENTS.md` / `GEMINI.md`). Cursor and Kiro get the MCP config only. Removed cleanly by `codegraph uninstall`.
 - Set up auto-allow permissions when Claude Code is one of the targets.
 - Set up auto-allow permissions when Claude Code is one of the targets.
-- Initialize your current project (local installs only).
+
+The installer **wires up your agents only — it does not index your code.** After it finishes, build each project's graph yourself with `codegraph init` (step 3 below).
 
 
 ## Non-interactive (scripting / CI)
 ## Non-interactive (scripting / CI)
 
 
@@ -43,10 +44,10 @@ Restart your agent (Claude Code / Cursor / Codex CLI / opencode / Hermes Agent /
 
 
 ```bash
 ```bash
 cd your-project
 cd your-project
-codegraph init -i
+codegraph init
 ```
 ```
 
 
-This builds the per-project knowledge graph index and wires up any project-local agent surfaces, so a single global `codegraph install` works in every project you open.
+`codegraph init` creates the local `.codegraph/` directory and builds the full graph in the same step — one command. A single global `codegraph install` covers every project; you run `codegraph init` once per project.
 
 
 ## Supported platforms
 ## Supported platforms
 
 

+ 7 - 8
site/src/content/docs/getting-started/introduction.md

@@ -1,6 +1,6 @@
 ---
 ---
 title: Introduction
 title: Introduction
-description: What CodeGraph is, and why it makes AI coding agents faster and cheaper.
+description: What CodeGraph is, and why it makes AI coding agents faster and more precise.
 ---
 ---
 
 
 CodeGraph is a **local-first code-intelligence tool**. It parses your codebase with [tree-sitter](https://tree-sitter.github.io/), stores every symbol, edge, and file in a local SQLite database, and exposes the result as a queryable **knowledge graph** — over the [Model Context Protocol (MCP)](/codegraph/reference/mcp-server/), a CLI, and a TypeScript library.
 CodeGraph is a **local-first code-intelligence tool**. It parses your codebase with [tree-sitter](https://tree-sitter.github.io/), stores every symbol, edge, and file in a local SQLite database, and exposes the result as a queryable **knowledge graph** — over the [Model Context Protocol (MCP)](/codegraph/reference/mcp-server/), a CLI, and a TypeScript library.
@@ -9,16 +9,15 @@ It exists to make AI coding agents — Claude Code, Cursor, Codex CLI, opencode,
 
 
 ## Why it matters
 ## Why it matters
 
 
-When an agent explores a codebase, it spends most of its budget on *discovery* — finding the right files before it can read them. CodeGraph removes that step: symbol relationships, call graphs, and structure are already indexed.
+When an agent explores a codebase, it spends most of its budget on *discovery* — finding the right files before it can read them. CodeGraph removes that step: it hands the agent the exact code it needs in one call, so symbol relationships, call graphs, and structure don't have to be rebuilt file by file.
 
 
-Tested across 7 real-world open-source codebases (median of 4 runs per arm), giving an agent CodeGraph was on average:
+The universal win is **surgical context and speed** — fewer tool calls, faster answers, on every codebase. Tested across 7 real-world open-source codebases (median of 4 runs per arm), giving an agent CodeGraph meant, regardless of repo size:
 
 
-- **35% cheaper**
-- **57% fewer tokens**
-- **46% faster**
-- **71% fewer tool calls**
+- **58% fewer tool calls**
+- **22% faster**
+- **file reads cut to ~zero**
 
 
-The gains scale with codebase size — on large repos the agent answers from the index with **zero file reads**.
+Token and dollar savings are real too, but they're the **scale-dependent bonus** that shows up on large, tangled codebases run at volume — small and noisy on a modest repo, material only once the codebase (and the team) gets big.
 
 
 ## What's in the graph
 ## What's in the graph
 
 

+ 11 - 8
site/src/content/docs/getting-started/quickstart.md

@@ -5,7 +5,9 @@ description: Get up and running with CodeGraph in seconds.
 
 
 Get up and running with CodeGraph in seconds.
 Get up and running with CodeGraph in seconds.
 
 
-## No Node.js required — one command grabs the right build for your OS
+## 1. Install the CLI
+
+No Node.js required — one command grabs the right build for your OS:
 
 
 ```bash
 ```bash
 # macOS / Linux
 # macOS / Linux
@@ -15,22 +17,23 @@ curl -fsSL https://raw.githubusercontent.com/colbymchenry/codegraph/main/install
 irm https://raw.githubusercontent.com/colbymchenry/codegraph/main/install.ps1 | iex
 irm https://raw.githubusercontent.com/colbymchenry/codegraph/main/install.ps1 | iex
 ```
 ```
 
 
-## Already have Node? Use npm instead (works on any version)
+Already have Node? `npm i -g @colbymchenry/codegraph` works on any version. CodeGraph bundles its own runtime — nothing to compile, no native build, works the same everywhere. The installer puts `codegraph` on your `PATH` but doesn't change your current shell — open a new terminal before the next step.
+
+## 2. Wire up your agent(s)
 
 
 ```bash
 ```bash
-npx @colbymchenry/codegraph        # zero-install, or:
-npm i -g @colbymchenry/codegraph
+codegraph install
 ```
 ```
 
 
-CodeGraph bundles its own runtime — nothing to compile, no native build, works the same everywhere. The interactive installer auto-configures your agent(s) — Claude Code, Cursor, Codex CLI, opencode, Hermes Agent, Gemini CLI, Antigravity IDE, Kiro.
+Auto-detects and configures Claude Code, Cursor, Codex CLI, opencode, Hermes Agent, Gemini CLI, Antigravity IDE, and Kiro — wiring the CodeGraph MCP server into each. This step connects your agents only; it does **not** index any code. (Shortcut: `npx @colbymchenry/codegraph` downloads and runs the installer in one go.)
 
 
-## Initialize Projects
+## 3. Initialize each project
 
 
 ```bash
 ```bash
 cd your-project
 cd your-project
-codegraph init -i
+codegraph init
 ```
 ```
 
 
-That's it — your agent will use CodeGraph tools automatically when a `.codegraph/` directory exists.
+`codegraph init` creates the local `.codegraph/` directory and builds the full graph in the same step — one command, done. Your agent will use CodeGraph tools automatically when a `.codegraph/` directory exists.
 
 
 Next: build [Your First Graph](/codegraph/getting-started/your-first-graph/), or see the full [Installation](/codegraph/getting-started/installation/) options.
 Next: build [Your First Graph](/codegraph/getting-started/your-first-graph/), or see the full [Installation](/codegraph/getting-started/installation/) options.

+ 13 - 6
site/src/content/docs/getting-started/your-first-graph.md

@@ -3,19 +3,19 @@ title: Your First Graph
 description: Build an index and run your first queries against it.
 description: Build an index and run your first queries against it.
 ---
 ---
 
 
-Once CodeGraph is installed, building and exploring a graph takes three commands.
+Once CodeGraph is installed, building and exploring a graph takes a few commands.
 
 
 ## Index a project
 ## Index a project
 
 
 ```bash
 ```bash
 cd your-project
 cd your-project
-codegraph init -i      # initialize + index in one step
+codegraph init
 ```
 ```
 
 
-`init` creates the `.codegraph/` directory; `-i` (or `--index`) immediately builds the full index. For an existing project you can re-index any time:
+`codegraph init` creates the `.codegraph/` directory and builds the full graph in the same step — one command, done. From there a native file watcher keeps the index in sync on every change, so you rarely need to rebuild by hand. When you do want to:
 
 
 ```bash
 ```bash
-codegraph index          # full index
+codegraph index          # full re-index
 codegraph sync           # incremental update of changed files
 codegraph sync           # incremental update of changed files
 ```
 ```
 
 
@@ -29,15 +29,22 @@ This reports the node/edge/file counts, the active SQLite backend, and the journ
 
 
 ## Run a query
 ## Run a query
 
 
+Reach for `codegraph explore` first — a natural-language question or a bag of symbol names returns the relevant source plus the call paths between those symbols in a single shot (the same output the `codegraph_explore` tool gives your agent):
+
+```bash
+codegraph explore "how does login work"
+```
+
+For narrower, scriptable lookups there are focused commands:
+
 ```bash
 ```bash
 codegraph query UserService          # find symbols by name
 codegraph query UserService          # find symbols by name
 codegraph callers handleRequest      # what calls a function
 codegraph callers handleRequest      # what calls a function
 codegraph callees handleRequest      # what a function calls
 codegraph callees handleRequest      # what a function calls
 codegraph impact AuthMiddleware      # what a change would affect
 codegraph impact AuthMiddleware      # what a change would affect
-codegraph context "fix the login flow"   # build task-focused context
 ```
 ```
 
 
-Each accepts `--json` for machine-readable output. See the full [CLI reference](/codegraph/reference/cli/).
+These four each accept `--json` for machine-readable output. See the full [CLI reference](/codegraph/reference/cli/).
 
 
 ## Hand it to your agent
 ## Hand it to your agent
 
 

+ 10 - 7
site/src/content/docs/guides/framework-routes.md

@@ -8,18 +8,21 @@ CodeGraph detects web-framework routing files and emits `route` nodes linked by
 | Framework | Shapes recognized |
 | Framework | Shapes recognized |
 |---|---|
 |---|---|
 | **Django** | `path()`, `re_path()`, `url()`, `include()` in `urls.py` (CBV `.as_view()`, dotted paths) |
 | **Django** | `path()`, `re_path()`, `url()`, `include()` in `urls.py` (CBV `.as_view()`, dotted paths) |
-| **Flask** | `@app.route('/path', methods=[])`, blueprint routes |
-| **FastAPI** | `@app.get(…)`, `@router.post(…)`, all standard methods |
-| **Express** | `app.get(…)`, `router.post(…)` with middleware chains |
-| **NestJS** | `@Controller` + `@Get/@Post/…`, GraphQL resolvers, message/event patterns, WebSocket subscriptions |
+| **Flask** | `@app.route('/path', methods=[...])`, blueprint routes |
+| **FastAPI** | `@app.get(...)`, `@router.post(...)`, all standard methods |
+| **Express** | `app.get(...)`, `router.post(...)` with middleware chains |
+| **NestJS** | `@Controller` + `@Get/@Post/...`, GraphQL `@Resolver` + `@Query/@Mutation`, `@MessagePattern`/`@EventPattern`, `@SubscribeMessage` |
 | **Laravel** | `Route::get()`, `Route::resource()`, `Controller@action`, tuple syntax |
 | **Laravel** | `Route::get()`, `Route::resource()`, `Controller@action`, tuple syntax |
-| **Drupal** | `*.routing.yml` routes; `hook_*` implementations in `.module`/`.theme`/`.install`/`.inc` |
-| **Rails** | `get '/x', to: 'users#index'`, hash-rocket syntax |
+| **Drupal** | `*.routing.yml` routes (`_controller`, `_form`, entity handlers); `hook_*` implementations in `.module`/`.theme`/`.install`/`.inc` |
+| **Rails** | `get '/x', to: 'users#index'`, hash-rocket `=>` syntax |
 | **Spring** | `@GetMapping`, `@PostMapping`, `@RequestMapping` on methods |
 | **Spring** | `@GetMapping`, `@PostMapping`, `@RequestMapping` on methods |
-| **Gin / chi / gorilla / mux** | `r.GET(…)`, `router.HandleFunc(…)` |
+| **Play** | `GET`/`POST`/… verb routes in `conf/routes` → `Controller.method` actions (Scala + Java) |
+| **Gin / chi / gorilla / mux** | `r.GET(...)`, `router.HandleFunc(...)` |
 | **Axum / actix / Rocket** | `.route("/x", get(handler))` |
 | **Axum / actix / Rocket** | `.route("/x", get(handler))` |
 | **ASP.NET** | `[HttpGet("/x")]` attributes on action methods |
 | **ASP.NET** | `[HttpGet("/x")]` attributes on action methods |
 | **Vapor** | `app.get("x", use: handler)` |
 | **Vapor** | `app.get("x", use: handler)` |
 | **React Router** / **SvelteKit** | Route component nodes |
 | **React Router** / **SvelteKit** | Route component nodes |
+| **Vue Router** / **Nuxt** | `pages/` file-based routes, `server/api/` endpoints, route middleware |
+| **Astro** | `src/pages/` file-based routes (`.astro` pages + `.ts` endpoints, `[param]`/`[...rest]` syntax) |
 
 
 Route resolution is automatic — there's nothing to configure. If a framework file is recognized, its routes appear in the graph after the next index or sync.
 Route resolution is automatic — there's nothing to configure. If a framework file is recognized, its routes appear in the graph after the next index or sync.

+ 3 - 3
site/src/content/docs/guides/indexing.md

@@ -7,10 +7,10 @@ description: Full index, incremental sync, and the file watcher.
 
 
 ```bash
 ```bash
 cd your-project
 cd your-project
-codegraph init -i      # initialize + full index
+codegraph init      # creates .codegraph/ and builds the full graph — one step
 ```
 ```
 
 
-`init` creates `.codegraph/`; `-i`/`--index` builds the index immediately. To initialize without indexing, drop the flag and run `codegraph index` later.
+`codegraph init` creates the local `.codegraph/` directory and builds the full graph in the same step — one command, done. There's no separate index step to run afterward, and from here the graph [stays fresh automatically](#stay-fresh-automatically).
 
 
 ## Full vs. incremental
 ## Full vs. incremental
 
 
@@ -20,7 +20,7 @@ codegraph index --force   # re-index from scratch
 codegraph sync            # incremental — only changed files
 codegraph sync            # incremental — only changed files
 ```
 ```
 
 
-`sync` is fast because it only reparses what changed. Use it after a branch switch or a batch of edits.
+`sync` is fast because it only reparses what changed — it's what the file watcher runs for you on every edit (see [Stay fresh automatically](#stay-fresh-automatically)). You rarely need to run it by hand.
 
 
 ## Stay fresh automatically
 ## Stay fresh automatically
 
 

+ 24 - 0
site/src/content/docs/reference/api.md

@@ -43,3 +43,27 @@ cg.close();
 | `buildContext(task, opts)` | Markdown / JSON context for AI |
 | `buildContext(task, opts)` | Markdown / JSON context for AI |
 | `watch()` / `unwatch()` | Start / stop the file watcher |
 | `watch()` / `unwatch()` | Start / stop the file watcher |
 | `close()` | Close the database connection |
 | `close()` | Close the database connection |
+
+CommonJS works too — `const { CodeGraph } = require('@colbymchenry/codegraph');`.
+
+## Lower-level building blocks
+
+The same entry point exports primitives for callers that drive the graph directly rather than through the `CodeGraph` facade: `DatabaseConnection`, `QueryBuilder`, `getDatabasePath`, `initGrammars` / `loadGrammarsForLanguages`, and `FileLock`.
+
+```typescript
+import {
+  CodeGraph,
+  DatabaseConnection,
+  QueryBuilder,
+  getDatabasePath,
+  initGrammars,
+  loadGrammarsForLanguages,
+  FileLock,
+} from '@colbymchenry/codegraph';
+```
+
+## Embedding requirements
+
+- **Install from npm** (`npm i @colbymchenry/codegraph`) so the matching per-platform package — which carries the compiled library — is fetched alongside the shim.
+- The API runs on **your** runtime, so it needs **Node 22.5+** for the built-in `node:sqlite` module (an Electron main process qualifies when its bundled Node is 22.5+). The CLI and MCP server are unaffected — they ship with a self-contained bundled runtime and need no Node at all.
+- TypeScript types ship with the package. Keep `@types/node` available and `skipLibCheck: true` (the common default).

+ 22 - 8
site/src/content/docs/reference/cli.md

@@ -7,21 +7,33 @@ description: Every CodeGraph command and the flags it accepts.
 codegraph                         # Run interactive installer
 codegraph                         # Run interactive installer
 codegraph install                 # Run installer (explicit)
 codegraph install                 # Run installer (explicit)
 codegraph uninstall               # Remove CodeGraph from your agents (inverse of install)
 codegraph uninstall               # Remove CodeGraph from your agents (inverse of install)
-codegraph init [path]             # Initialize in a project (--index to also index)
+codegraph init [path]             # Initialize a project + build its graph (one step)
 codegraph uninit [path]           # Remove CodeGraph from a project (--force to skip prompt)
 codegraph uninit [path]           # Remove CodeGraph from a project (--force to skip prompt)
-codegraph index [path]            # Full index (--force to re-index, --quiet for less output)
-codegraph sync [path]             # Incremental update
-codegraph status [path]           # Show statistics
+codegraph index [path]            # Full re-index from scratch (--force, --quiet, --verbose)
+codegraph sync [path]             # Incremental update (--quiet)
+codegraph status [path]           # Show statistics (--json)
+codegraph unlock [path]           # Remove a stale lock file that's blocking indexing
 codegraph query <search>          # Search symbols (--kind, --limit, --json)
 codegraph query <search>          # Search symbols (--kind, --limit, --json)
-codegraph files [path]            # Show file structure (--format, --filter, --max-depth, --json)
-codegraph context <task>          # Build context for AI (--format, --max-nodes)
+codegraph explore <query>         # Relevant symbols' source + call paths in one shot (same output as the codegraph_explore MCP tool)
+codegraph node <symbol|file>      # One symbol's source + callers, or read a file with line numbers (same output as codegraph_node)
+codegraph files [path]            # Show file structure (--format, --filter, --pattern, --max-depth, --json)
 codegraph callers <symbol>        # Find what calls a function/method (--limit, --json)
 codegraph callers <symbol>        # Find what calls a function/method (--limit, --json)
 codegraph callees <symbol>        # Find what a function/method calls (--limit, --json)
 codegraph callees <symbol>        # Find what a function/method calls (--limit, --json)
 codegraph impact <symbol>         # Analyze what code is affected by changing a symbol (--depth, --json)
 codegraph impact <symbol>         # Analyze what code is affected by changing a symbol (--depth, --json)
-codegraph affected [files...]     # Find test files affected by changes
-codegraph serve --mcp             # Start MCP server
+codegraph affected [files...]     # Find test files affected by changes (see below)
+codegraph daemon                  # Manage background daemons — pick one to stop (alias: daemons)
+codegraph telemetry [on|off]      # Show or change anonymous usage telemetry
+codegraph upgrade [version]       # Update to the latest release (--check, --force)
+codegraph version                 # Print the installed version (also -v, --version)
+codegraph help [command]          # Show help, optionally for one command
 ```
 ```
 
 
+The MCP server (`codegraph serve --mcp`) is launched automatically by your agent — you don't run it by hand. See [MCP Server](/codegraph/reference/mcp-server/).
+
+## init, index, and sync
+
+`codegraph init` creates the local `.codegraph/` directory **and** builds the full graph in one step. (The old `-i`/`--index` flag is now a no-op, accepted only so existing scripts don't break.) After that the file watcher keeps the graph current automatically — `index` (a full rebuild from scratch) and `sync` (an incremental update) are only needed when the watcher is disabled or you're scripting against the index outside an agent session.
+
 ## Query commands
 ## Query commands
 
 
 `query`, `callers`, `callees`, and `impact` all accept `--json` for machine-readable output.
 `query`, `callers`, `callees`, and `impact` all accept `--json` for machine-readable output.
@@ -32,6 +44,8 @@ codegraph callers handleRequest --json
 codegraph impact AuthMiddleware --depth 3
 codegraph impact AuthMiddleware --depth 3
 ```
 ```
 
 
+`explore` and `node` are the CLI faces of the `codegraph_explore` and `codegraph_node` MCP tools — same output — so subagents and non-MCP harnesses can reach the graph from a shell.
+
 ## affected
 ## affected
 
 
 Traces import dependencies transitively to find which test files are affected by changed source files. See [Affected Tests in CI](/codegraph/guides/affected-tests/) for options and a CI example.
 Traces import dependencies transitively to find which test files are affected by changed source files. See [Affected Tests in CI](/codegraph/guides/affected-tests/) for options and a CI example.

+ 5 - 9
site/src/content/docs/reference/integrations.md

@@ -3,7 +3,7 @@ title: Integrations
 description: Supported agents, and manual MCP setup.
 description: Supported agents, and manual MCP setup.
 ---
 ---
 
 
-The interactive installer auto-detects and configures each supported agent — wiring up the MCP server and writing its instructions file.
+The interactive installer auto-detects and configures each supported agent — wiring the CodeGraph MCP server into each. For the agents that use an instructions file, it also writes a short marker-fenced CodeGraph section (`CLAUDE.md`, `AGENTS.md`, or `GEMINI.md`) so subagents and non-MCP harnesses learn the `codegraph explore` command; `codegraph uninstall` removes it.
 
 
 ## Supported agents
 ## Supported agents
 
 
@@ -40,24 +40,20 @@ Add the MCP server to `~/.claude.json`:
 }
 }
 ```
 ```
 
 
-Optionally auto-allow the read-only tools in `~/.claude/settings.json`:
+Optionally auto-allow CodeGraph's tools in `~/.claude/settings.json`:
 
 
 ```json
 ```json
 {
 {
   "permissions": {
   "permissions": {
     "allow": [
     "allow": [
-      "mcp__codegraph__codegraph_search",
-      "mcp__codegraph__codegraph_callers",
-      "mcp__codegraph__codegraph_callees",
-      "mcp__codegraph__codegraph_impact",
-      "mcp__codegraph__codegraph_node",
-      "mcp__codegraph__codegraph_status",
-      "mcp__codegraph__codegraph_files"
+      "mcp__codegraph__*"
     ]
     ]
   }
   }
 }
 }
 ```
 ```
 
 
+One wildcard auto-approves every CodeGraph tool. The server lists a single tool by default — `codegraph_explore` — but if you re-enable others via the `CODEGRAPH_MCP_TOOLS` environment variable, they're already permitted with no prompt.
+
 :::tip
 :::tip
 Cursor launches MCP subprocesses with the wrong working directory. The installer handles this for you by injecting a `--path` argument; if you wire Cursor up by hand, pass the project path explicitly.
 Cursor launches MCP subprocesses with the wrong working directory. The installer handles this for you by injecting a `--path` argument; if you wire Cursor up by hand, pass the project path explicitly.
 :::
 :::

+ 3 - 0
site/src/content/docs/reference/languages.md

@@ -18,13 +18,16 @@ Language support is automatic from the file extension — there's nothing to con
 | Ruby | `.rb` | Full support |
 | Ruby | `.rb` | Full support |
 | C | `.c`, `.h` | Full support |
 | C | `.c`, `.h` | Full support |
 | C++ | `.cpp`, `.hpp`, `.cc` | Full support |
 | C++ | `.cpp`, `.hpp`, `.cc` | Full support |
+| Objective-C | `.m`, `.mm`, `.h` | Partial support (classes, protocols, methods, `@property`, `#import`, message sends; `.mm` ObjC++ may parse incompletely) |
 | Swift | `.swift` | Full support |
 | Swift | `.swift` | Full support |
 | Kotlin | `.kt`, `.kts` | Full support |
 | Kotlin | `.kt`, `.kts` | Full support |
 | Scala | `.scala`, `.sc` | Full support (classes, traits, methods, type aliases, Scala 3 enums) |
 | Scala | `.scala`, `.sc` | Full support (classes, traits, methods, type aliases, Scala 3 enums) |
 | Dart | `.dart` | Full support |
 | Dart | `.dart` | Full support |
 | Svelte | `.svelte` | Full support (script extraction, Svelte 5 runes, SvelteKit routes) |
 | Svelte | `.svelte` | Full support (script extraction, Svelte 5 runes, SvelteKit routes) |
 | Vue | `.vue` | Full support (script + script-setup, Nuxt page/API/middleware routes) |
 | Vue | `.vue` | Full support (script + script-setup, Nuxt page/API/middleware routes) |
+| Astro | `.astro` | Full support (frontmatter + script extraction, template component/call references, `src/pages/` routes) |
 | Liquid | `.liquid` | Full support |
 | Liquid | `.liquid` | Full support |
 | Pascal / Delphi | `.pas`, `.dpr`, `.dpk`, `.lpr` | Full support (classes, records, interfaces, enums, DFM/FMX forms) |
 | Pascal / Delphi | `.pas`, `.dpr`, `.dpk`, `.lpr` | Full support (classes, records, interfaces, enums, DFM/FMX forms) |
 | Lua | `.lua` | Full support (functions, methods, locals, `require` imports, call edges) |
 | Lua | `.lua` | Full support (functions, methods, locals, `require` imports, call edges) |
+| R | `.R`, `.r` | Full support (functions, S4/R5/R6 classes with methods, `library`/`require` imports, `source()` file references, call edges) |
 | Luau | `.luau` | Full support (Lua, plus typed signatures, `type` aliases, Roblox `require`) |
 | Luau | `.luau` | Full support (Lua, plus typed signatures, `type` aliases, Roblox `require`) |

+ 23 - 8
site/src/content/docs/reference/mcp-server.md

@@ -3,29 +3,44 @@ title: MCP Server
 description: The tools CodeGraph exposes to AI agents over MCP.
 description: The tools CodeGraph exposes to AI agents over MCP.
 ---
 ---
 
 
-CodeGraph runs as a [Model Context Protocol](https://modelcontextprotocol.io/) server. Start it with:
+CodeGraph runs as a [Model Context Protocol](https://modelcontextprotocol.io/) server. Agents configured by the installer launch it automatically — you don't start it by hand:
 
 
 ```bash
 ```bash
 codegraph serve --mcp
 codegraph serve --mcp
 ```
 ```
 
 
-Agents configured by the installer launch this automatically. When a `.codegraph/` index exists, the agent uses the tools below.
+When a `.codegraph/` index exists, the agent gets the tool below. In a workspace with **no** index, the server announces itself inactive and lists **no** tools — the agent works normally with its built-in tools, and indexing stays your decision.
 
 
-## Tools
+## One tool by default: `codegraph_explore`
+
+By default the server exposes a **single tool**, `codegraph_explore`. It's Read-equivalent: give it a natural-language question or a bag of symbol and file names, and it returns the **verbatim, line-numbered source** of the relevant symbols grouped by file — the same shape the `Read` tool gives you — plus the call paths between them (including dynamic-dispatch hops like callbacks, React re-render, and JSX children that grep can't follow) and a blast-radius summary of what depends on them. One call usually answers the whole question.
+
+Exposing a single strong tool is deliberate. Measured agent behavior showed that one well-aimed tool steers agents to a direct answer better than a menu of narrower ones — fewer mis-picks — and agents reach for it both when answering questions and while editing code.
+
+## The other tools
+
+Seven more tools exist and stay fully functional, but are **unlisted by default** — everything they return already arrives inline on a `codegraph_explore` response (its blast-radius section, the relationship map, a symbol's body and its callee list):
 
 
 | Tool | Purpose |
 | Tool | Purpose |
 |---|---|
 |---|---|
-| `codegraph_search` | Find symbols by name across the codebase |
+| `codegraph_node` | One symbol's source + caller/callee trail, or a whole file read with line numbers (Read-parity). Returns every overload's body for an ambiguous name. |
+| `codegraph_search` | Find symbols by name across the codebase (locations only) |
 | `codegraph_callers` | Find what calls a function |
 | `codegraph_callers` | Find what calls a function |
 | `codegraph_callees` | Find what a function calls |
 | `codegraph_callees` | Find what a function calls |
 | `codegraph_impact` | Analyze what code is affected by changing a symbol |
 | `codegraph_impact` | Analyze what code is affected by changing a symbol |
-| `codegraph_node` | Get details about a specific symbol (optionally with source code) |
-| `codegraph_explore` | Return source for several related symbols grouped by file, plus a relationship map, in one call |
 | `codegraph_files` | Get the indexed file structure (faster than filesystem scanning) |
 | `codegraph_files` | Get the indexed file structure (faster than filesystem scanning) |
 | `codegraph_status` | Check index health and statistics |
 | `codegraph_status` | Check index health and statistics |
 
 
+Re-enable any of them with the `CODEGRAPH_MCP_TOOLS` environment variable — a comma-separated allowlist of short names that replaces the default:
+
+```bash
+CODEGRAPH_MCP_TOOLS=explore,node,search,callers
+```
+
+Each also has a CLI equivalent (`codegraph node` / `query` / `callers` / `callees` / `impact` / `files` / `status`) for scripts and non-MCP harnesses.
+
 ## How agents should use it
 ## How agents should use it
 
 
-CodeGraph *is* the pre-built search index. For "how does X work?", architecture, trace, or where-is-X questions, an agent should answer in a handful of CodeGraph calls and stop — typically with **zero file reads** — rather than re-deriving the answer with `grep` + `Read`. A direct CodeGraph answer is a handful of calls; a grep/read exploration is dozens.
+CodeGraph *is* the pre-built search index. For "how does X work?", architecture, a flow ("how does X reach Y"), or where-is-X questions — and while editing code — an agent should answer with `codegraph_explore` and stop, typically with **zero file reads**, rather than re-deriving the answer with `grep` + `Read`. A direct CodeGraph answer is one to a few calls; a grep/read exploration is dozens.
 
 
-The installer writes this guidance into each agent's instructions file automatically.
+The MCP server delivers this guidance to the main agent automatically, in the MCP `initialize` response. Because subagents and non-MCP harnesses never see that response, the installer also writes a short marker-fenced section into each agent's instructions file pointing at the `codegraph explore` CLI equivalent.

+ 6 - 2
site/src/content/docs/troubleshooting.md

@@ -20,8 +20,12 @@ Current builds shouldn't: CodeGraph bundles its own Node runtime and uses Node's
 
 
 ## MCP server not connecting
 ## MCP server not connecting
 
 
-Ensure the project is initialized/indexed, verify the path in your MCP config, and check that `codegraph serve --mcp` works from the command line.
+Your agent starts the server itself, so you don't launch it by hand. Make sure the project is initialized and indexed (`codegraph status`) and that the path in your MCP config is correct. If it still won't connect, re-run `codegraph install` to rewrite the config.
 
 
 ## Missing symbols
 ## Missing symbols
 
 
-The MCP server auto-syncs on save (wait a couple of seconds). Run `codegraph sync` manually if needed. Check that the file's language is [supported](/codegraph/reference/languages/) and isn't excluded by `.gitignore`.
+The MCP server auto-syncs on save (wait a couple of seconds). Run `codegraph sync` manually if needed. Check that the file's language is [supported](/codegraph/reference/languages/) and isn't inside a `.gitignore`d or default-excluded directory (e.g. `node_modules`, `dist`).
+
+## Sharing one checkout between Windows and WSL
+
+Don't point both at the same `.codegraph/`: the background-server lock and the SQLite index are tied to the OS that wrote them, and SQLite locking across the WSL2/Windows filesystem boundary is unreliable. Give each side its own index in the same tree by setting `CODEGRAPH_DIR` to a distinct name on one of them — e.g. `CODEGRAPH_DIR=.codegraph-win` on Windows, leaving WSL on the default `.codegraph`. CodeGraph skips any sibling `.codegraph-*` directory when indexing and watching, so the two never trip over each other.

+ 75 - 10
src/extraction/index.ts

@@ -32,6 +32,18 @@ import type { ResolutionContext } from '../resolution/types';
  */
  */
 const FILE_IO_BATCH_SIZE = 10;
 const FILE_IO_BATCH_SIZE = 10;
 
 
+/**
+ * How many files the `sync()` reconcile processes between cooperative yields to
+ * the event loop. The reconcile runs two O(files) loops of synchronous `fs`
+ * calls (existsSync for removals, statSync for adds/mods); on a very large repo
+ * (~100k files) an un-yielded run wedges the main thread for minutes, which both
+ * trips the liveness watchdog (it SIGKILLs a process whose loop stops turning)
+ * and blocks the first MCP tool call behind the catch-up gate (issue #905).
+ * Yielding every N files keeps the socket, the watchdog heartbeat, and any
+ * concurrent read query responsive while the reconcile runs.
+ */
+const SYNC_RECONCILE_YIELD_INTERVAL = 1000;
+
 // PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
 // PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
 
 
 /**
 /**
@@ -256,6 +268,21 @@ function defaultsOnlyIgnore(): Ignore {
   return ignore().add(DEFAULT_IGNORE_PATTERNS);
   return ignore().add(DEFAULT_IGNORE_PATTERNS);
 }
 }
 
 
+/**
+ * `git ls-files --directory` collapses a wholly-untracked/ignored directory into
+ * one entry — and when the command's own cwd is such a directory (the indexed
+ * root is itself a git-ignored subdir of an enclosing repo), git emits the
+ * literal `./` meaning "this entire directory". That sentinel is not a real
+ * nested path: feeding it to the `ignore` matcher throws ("path should be a
+ * `path.relative()`d string, but got "./""), which used to abort `buildScopeIgnore`
+ * and so break the MCP daemon's watcher/auto-sync on connect; and joining it back
+ * onto `repoDir` would just re-point at the cwd. Drop it wherever we consume
+ * `--directory` output. (#936)
+ */
+function isWholeCwdEntry(entry: string): boolean {
+  return entry === './' || entry === '.' || entry === '';
+}
+
 /**
 /**
  * List the gitignored DIRECTORIES of a repo (collapsed, trailing-slash form),
  * List the gitignored DIRECTORIES of a repo (collapsed, trailing-slash form),
  * relative to `repoDir`. These are invisible to every other `git ls-files` /
  * relative to `repoDir`. These are invisible to every other `git ls-files` /
@@ -270,7 +297,7 @@ function listIgnoredDirs(repoDir: string): string[] {
       ['ls-files', '-z', '-o', '-i', '--exclude-standard', '--directory'],
       ['ls-files', '-z', '-o', '-i', '--exclude-standard', '--directory'],
       { cwd: repoDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'], windowsHide: true }
       { cwd: repoDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'], windowsHide: true }
     );
     );
-    return out.split('\0').filter((e) => e.endsWith('/'));
+    return out.split('\0').filter((e) => e.endsWith('/') && !isWholeCwdEntry(e));
   } catch {
   } catch {
     return [];
     return [];
   }
   }
@@ -289,8 +316,10 @@ const EMBEDDED_REPO_SEARCH_ENTRIES = 2000;
  * - A `.git` **file** is a pointer (`gitdir: …`). A git **worktree** points into
  * - A `.git` **file** is a pointer (`gitdir: …`). A git **worktree** points into
  *   the host repo's own `.git/worktrees/<name>`, so it is a second working view
  *   the host repo's own `.git/worktrees/<name>`, so it is a second working view
  *   of a repo CodeGraph already indexes — indexing it just duplicates the whole
  *   of a repo CodeGraph already indexes — indexing it just duplicates the whole
- *   graph N times; skip it (#848). A **submodule** points into `.git/modules/`
- *   and is distinct code, so index it as before.
+ *   graph N times; skip it (#848). A **submodule worktree** points into
+ *   `.git/modules/<module>/worktrees/<name>` — same duplication, so skip it too
+ *   (#945). A **submodule** checkout points into `.git/modules/<module>` (no
+ *   `worktrees/` segment) and is distinct code, so index it as before.
  *
  *
  * Returns `'none'` when there is no `.git` entry here.
  * Returns `'none'` when there is no `.git` entry here.
  */
  */
@@ -305,9 +334,12 @@ function classifyGitDir(absDir: string): 'embedded' | 'worktree' | 'none' {
   if (!st.isFile()) return 'none';
   if (!st.isFile()) return 'none';
   try {
   try {
     const gitdir = fs.readFileSync(path.join(absDir, '.git'), 'utf8').match(/^gitdir:\s*(.+)$/m)?.[1]?.trim();
     const gitdir = fs.readFileSync(path.join(absDir, '.git'), 'utf8').match(/^gitdir:\s*(.+)$/m)?.[1]?.trim();
-    // A linked worktree's gitdir lives under some repo's `.git/worktrees/`.
+    // A worktree's gitdir lives under some repo's `.git/worktrees/<name>` —
+    // either the top-level repo's (`.git/worktrees/`) or, for a worktree of a
+    // submodule, that submodule's gitdir (`.git/modules/<module>/worktrees/`).
+    // The optional `modules/<module>` segment covers the submodule case (#945).
     // Match both separators so a Windows-style pointer is recognized too.
     // Match both separators so a Windows-style pointer is recognized too.
-    if (gitdir && /(^|[\\/])\.git[\\/]worktrees[\\/]/.test(gitdir)) return 'worktree';
+    if (gitdir && /(^|[\\/])\.git[\\/](modules[\\/][^\\/]+[\\/])?worktrees[\\/]/.test(gitdir)) return 'worktree';
   } catch {
   } catch {
     // Unreadable `.git` pointer — fall back to the prior "index it" behavior.
     // Unreadable `.git` pointer — fall back to the prior "index it" behavior.
   }
   }
@@ -434,7 +466,7 @@ export function discoverEmbeddedRepoRoots(rootDir: string): string[] {
         { cwd: repoAbs, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }
         { cwd: repoAbs, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }
       );
       );
       for (const e of o.split('\0')) {
       for (const e of o.split('\0')) {
-        if (e.endsWith('/') && !defaults.ignores(e)) {
+        if (e.endsWith('/') && !isWholeCwdEntry(e) && !defaults.ignores(e)) {
           candidates.push(...findNestedGitRepos(path.join(repoAbs, e), e));
           candidates.push(...findNestedGitRepos(path.join(repoAbs, e), e));
         }
         }
       }
       }
@@ -619,6 +651,18 @@ function collectGitStatus(repoDir: string, prefix: string, out: GitChanges): voi
     { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }
     { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }
   );
   );
 
 
+  // This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
+  // Change detection must exclude the SAME files the full index does, but git
+  // status hides neither: it ignores nothing for *tracked* paths, and the
+  // built-in defaults aren't gitignore at all. Without this filter a committed
+  // vendor/ dir, or a tracked file under a .gitignored dir, surfaces here as a
+  // change — so `codegraph status` (which reads getChangedFiles) reports a
+  // pending edit the full index never tracks and `sync` never clears. Matching
+  // repo-relative `rel` at each recursion level mirrors getGitVisibleFiles'
+  // ScopeIgnore: every embedded repo is judged by ITS OWN rules, never the
+  // parent's. (#766)
+  const ig = buildDefaultIgnore(repoDir);
+
   const untrackedDirs: string[] = [];
   const untrackedDirs: string[] = [];
   for (const line of output.split('\n')) {
   for (const line of output.split('\n')) {
     if (line.length < 4) continue; // Minimum: "XY file"
     if (line.length < 4) continue; // Minimum: "XY file"
@@ -634,13 +678,22 @@ function collectGitStatus(repoDir: string, prefix: string, out: GitChanges): voi
     }
     }
 
 
     const filePath = normalizePath(prefix + rel);
     const filePath = normalizePath(prefix + rel);
-    // Skip non-source files (git status already omits .gitignored paths).
     if (!isSourceFile(filePath)) continue;
     if (!isSourceFile(filePath)) continue;
 
 
+    if (statusCode.includes('D')) {
+      // Deletions stay unfiltered: getChangedFiles acts on one only when the
+      // path is already tracked in the DB, where removal is always correct — and
+      // that lets a newly-excluded dir's stale rows clean themselves up. (#766)
+      out.deleted.push(filePath);
+      continue;
+    }
+
+    // Added (`??`) / modified files inside an excluded dir must not enter the
+    // index — match against the repo-relative path, same as the full scan. (#766)
+    if (ig.ignores(rel)) continue;
+
     if (statusCode === '??') {
     if (statusCode === '??') {
       out.added.push(filePath);
       out.added.push(filePath);
-    } else if (statusCode.includes('D')) {
-      out.deleted.push(filePath);
     } else {
     } else {
       // M, MM, AM, A (staged), etc. — treat as modified
       // M, MM, AM, A (staged), etc. — treat as modified
       out.modified.push(filePath);
       out.modified.push(filePath);
@@ -1733,7 +1786,7 @@ export class ExtractionOrchestrator {
     // whether or not the project uses git, and crucially also catches committed
     // whether or not the project uses git, and crucially also catches committed
     // changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status`
     // changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status`
     // cannot see, because the working tree is clean afterward.
     // cannot see, because the working tree is clean afterward.
-    const currentFiles = scanDirectory(this.rootDir);
+    const currentFiles = await scanDirectoryAsync(this.rootDir);
     filesChecked = currentFiles.length;
     filesChecked = currentFiles.length;
     const currentSet = new Set(currentFiles);
     const currentSet = new Set(currentFiles);
 
 
@@ -1746,15 +1799,27 @@ export class ExtractionOrchestrator {
     // Removals: tracked in the DB but no longer a present source file. Check the
     // Removals: tracked in the DB but no longer a present source file. Check the
     // filesystem directly — `scanDirectory` (via `git ls-files`) still lists a
     // filesystem directly — `scanDirectory` (via `git ls-files`) still lists a
     // file deleted from disk but not yet staged, so set membership alone misses it.
     // file deleted from disk but not yet staged, so set membership alone misses it.
+    // `reconcileChecks` drives the cooperative yield shared with the adds/mods loop
+    // below (see SYNC_RECONCILE_YIELD_INTERVAL / issue #905).
+    let reconcileChecks = 0;
     for (const tracked of trackedFiles) {
     for (const tracked of trackedFiles) {
       if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) {
       if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) {
         this.queries.deleteFile(tracked.path);
         this.queries.deleteFile(tracked.path);
         filesRemoved++;
         filesRemoved++;
       }
       }
+      if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
+        await new Promise<void>((resolve) => setImmediate(resolve));
+      }
     }
     }
 
 
     // Adds / modifications.
     // Adds / modifications.
     for (const filePath of currentFiles) {
     for (const filePath of currentFiles) {
+      // Same cooperative yield as the removals loop — this is the other O(files)
+      // synchronous-stat loop that wedges the main thread on a large repo (#905).
+      // Yield at the top of the body so the `continue` fast-paths below still hit it.
+      if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
+        await new Promise<void>((resolve) => setImmediate(resolve));
+      }
       const fullPath = path.join(this.rootDir, filePath);
       const fullPath = path.join(this.rootDir, filePath);
       const tracked = trackedMap.get(filePath);
       const tracked = trackedMap.get(filePath);
 
 

+ 39 - 2
src/extraction/languages/c-cpp.ts

@@ -148,6 +148,40 @@ export const cExtractor: LanguageExtractor = {
   },
   },
 };
 };
 
 
+/**
+ * Detect tree-sitter's misparse of a macro-annotated class/struct, e.g.
+ * `class MACRO Name { … }` or `class MACRO Name : public Base { … }` (#946).
+ * Not knowing `MACRO` is a macro, tree-sitter reads `class MACRO` as an
+ * *elaborated type specifier* (a bodyless `class_specifier`/`struct_specifier`
+ * whose "type name" is the macro) and the rest as a function: `Name` becomes the
+ * declarator and the `{ … }` a function body — so the whole declaration surfaces
+ * as a `function_definition` named after the class, with a line range spanning
+ * the entire class body. (A base clause, when present, additionally lands in an
+ * `ERROR` node, but it isn't required — the leading macro alone triggers this.)
+ *
+ * Two structural signals pin it down with no risk to genuine code:
+ *  - the `type` field is a *bodyless* class/struct specifier — an elaborated
+ *    type, not a real inline-defined return type like
+ *    `struct P { int x; } makeP() { … }` (which carries a field list); and
+ *  - the declarator is not a `function_declarator` — a real function definition
+ *    always has one, which also leaves the legal-but-rare `class Foo f() { … }`
+ *    (an elaborated return type on a genuine function) alone.
+ *
+ * The class body is mangled by the same misparse and is unrecoverable, so —
+ * matching how macro-prefixed C prototypes are handled — we drop the spurious
+ * node rather than mint a misleading whole-body `function` that pollutes
+ * callers/impact and skews kind statistics.
+ */
+function isMacroMisparsedTypeDecl(node: SyntaxNode): boolean {
+  const typeNode = getChildByField(node, 'type');
+  if (!typeNode) return false;
+  if (typeNode.type !== 'class_specifier' && typeNode.type !== 'struct_specifier') return false;
+  if (typeNode.namedChildren.some((c: SyntaxNode) => c.type === 'field_declaration_list')) return false;
+  const declarator = getChildByField(node, 'declarator');
+  if (declarator && declarator.type === 'function_declarator') return false;
+  return true;
+}
+
 export const cppExtractor: LanguageExtractor = {
 export const cppExtractor: LanguageExtractor = {
   functionTypes: ['function_definition'],
   functionTypes: ['function_definition'],
   classTypes: ['class_specifier'],
   classTypes: ['class_specifier'],
@@ -192,14 +226,17 @@ export const cppExtractor: LanguageExtractor = {
     }
     }
     return undefined;
     return undefined;
   },
   },
-  isMisparsedFunction: (name) => {
+  isMisparsedFunction: (name, node) => {
     // C++ macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause tree-sitter to misparse
     // C++ macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause tree-sitter to misparse
     // namespace blocks as function_definitions (e.g. name = "namespace detail").
     // namespace blocks as function_definitions (e.g. name = "namespace detail").
     // Also filter C++ keywords that tree-sitter occasionally misinterprets as
     // Also filter C++ keywords that tree-sitter occasionally misinterprets as
     // function/method names (e.g. switch statements inside macro-confused scopes).
     // function/method names (e.g. switch statements inside macro-confused scopes).
     if (name.startsWith('namespace')) return true;
     if (name.startsWith('namespace')) return true;
     const cppKeywords = ['switch', 'if', 'for', 'while', 'do', 'case', 'return'];
     const cppKeywords = ['switch', 'if', 'for', 'while', 'do', 'case', 'return'];
-    return cppKeywords.includes(name);
+    if (cppKeywords.includes(name)) return true;
+    // `class MACRO Name : public Base { … }` misparses to a function_definition
+    // named after the class — drop that phantom (#946).
+    return isMacroMisparsedTypeDecl(node);
   },
   },
   extractImport: (node, source) => {
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
     const importText = source.substring(node.startIndex, node.endIndex).trim();

+ 225 - 10
src/extraction/languages/java.ts

@@ -1,6 +1,6 @@
 import type { Node as SyntaxNode } from 'web-tree-sitter';
 import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
-import type { LanguageExtractor } from '../tree-sitter-types';
+import type { ExtractorContext, LanguageExtractor } from '../tree-sitter-types';
 
 
 /**
 /**
  * Tree-sitter-java node types for a method's `type` (return) field that can
  * Tree-sitter-java node types for a method's `type` (return) field that can
@@ -15,18 +15,16 @@ const JAVA_NON_CLASS_RETURN_NODES = new Set([
 ]);
 ]);
 
 
 /**
 /**
- * A Java method's declared return type, normalized to the bare class name a
- * chained `Foo.getInstance().bar()` could be called on (the #645/#608 mechanism).
- * Reads the `type` field: primitives/void/arrays yield undefined (no class to
- * chain on), `List<Foo>` is unwrapped to its base type `List`, and a dotted
- * package/outer-class qualifier (`java.util.List`) is stripped to the simple
- * name. Constructors have no `type` field → undefined.
+ * Normalize a Java type node to the bare class name a chained
+ * `foo.getThing().bar()` could be called on (the #645/#608 mechanism):
+ * primitives/void/arrays yield undefined (no class to chain on), `List<Foo>`
+ * is unwrapped to its base `List`, and a dotted package/outer-class qualifier
+ * (`java.util.List`) is stripped to the simple name.
  */
  */
-function extractJavaReturnType(node: SyntaxNode, source: string): string | undefined {
-  const typeNode = getChildByField(node, 'type');
+function normalizeJavaType(typeNode: SyntaxNode | null, source: string): string | undefined {
   if (!typeNode) return undefined;
   if (!typeNode) return undefined;
   if (JAVA_NON_CLASS_RETURN_NODES.has(typeNode.type)) return undefined;
   if (JAVA_NON_CLASS_RETURN_NODES.has(typeNode.type)) return undefined;
-  // An array return (`Foo[]`) isn't a receiver you call instance methods on.
+  // An array (`Foo[]`) isn't a receiver you call instance methods on.
   if (typeNode.type === 'array_type') return undefined;
   if (typeNode.type === 'array_type') return undefined;
   // Strip type arguments (`List<Foo>` → `List`) — the chain resolves on the base.
   // Strip type arguments (`List<Foo>` → `List`) — the chain resolves on the base.
   const raw = getNodeText(typeNode, source).trim().replace(/<[^>]*>/g, '');
   const raw = getNodeText(typeNode, source).trim().replace(/<[^>]*>/g, '');
@@ -36,6 +34,222 @@ function extractJavaReturnType(node: SyntaxNode, source: string): string | undef
   return last;
   return last;
 }
 }
 
 
+/**
+ * A Java method's declared return type. Reads the `type` field; constructors
+ * (no `type` field) → undefined.
+ */
+function extractJavaReturnType(node: SyntaxNode, source: string): string | undefined {
+  return normalizeJavaType(getChildByField(node, 'type'), source);
+}
+
+// ---------------------------------------------------------------------------
+// Lombok-generated member synthesis (#912)
+// ---------------------------------------------------------------------------
+// Lombok generates methods at compile time, so they never appear in the source
+// AST and static extraction misses them — `bean.getX()`, `bean.setX()`,
+// `Bean.builder()`, and `log.info(...)` calls then resolve to nothing and call
+// chains break silently. We synthesize the mechanical, well-documented ones.
+
+/** Lombok logging annotations — all generate a field named `log` by default. */
+const LOMBOK_LOG_ANNOTATIONS = new Set([
+  'Slf4j', 'Log4j', 'Log4j2', 'Log', 'CommonsLog', 'JBossLog', 'Flogger', 'XSlf4j', 'CustomLog',
+]);
+
+/** Simple names of every annotation in a node's `modifiers` child (`@lombok.Getter` → `Getter`). */
+function lombokAnnotationNames(node: SyntaxNode): Set<string> {
+  const names = new Set<string>();
+  const modifiers = node.namedChildren.find((c: SyntaxNode) => c.type === 'modifiers');
+  if (!modifiers) return names;
+  for (const child of modifiers.namedChildren) {
+    if (child.type === 'marker_annotation' || child.type === 'annotation') {
+      const nameNode = getChildByField(child, 'name');
+      const simple = nameNode ? nameNode.text.trim().split('.').pop() : undefined;
+      if (simple) names.add(simple);
+    }
+  }
+  return names;
+}
+
+/** Text of a declaration's `modifiers` child (keyword modifiers are anonymous, so match on text). */
+function modifierTextOf(node: SyntaxNode): string {
+  const modifiers = node.namedChildren.find((c: SyntaxNode) => c.type === 'modifiers');
+  return modifiers ? modifiers.text : '';
+}
+
+function capitalizeJava(name: string): string {
+  return name ? name.charAt(0).toUpperCase() + name.slice(1) : name;
+}
+
+/** Lombok getter name: `getX`, or `isX` for a primitive boolean (keeping an existing `isFoo` field name). */
+function lombokGetterName(fieldName: string, isBooleanPrimitive: boolean): string {
+  if (isBooleanPrimitive) {
+    return /^is[A-Z]/.test(fieldName) ? fieldName : 'is' + capitalizeJava(fieldName);
+  }
+  return 'get' + capitalizeJava(fieldName);
+}
+
+/** Lombok setter name: `setX` (a primitive boolean field `isFoo` sets via `setFoo`). */
+function lombokSetterName(fieldName: string, isBooleanPrimitive: boolean): string {
+  const base = isBooleanPrimitive && /^is[A-Z]/.test(fieldName) ? fieldName.slice(2) : fieldName;
+  return 'set' + capitalizeJava(base);
+}
+
+/**
+ * Synthesize the members Lombok generates at compile time. Covers the common,
+ * mechanical annotations:
+ *
+ *   @Getter / @Setter (class- or field-level)  → getX()/isX(), setX()
+ *   @Data                                       → getters + setters (non-final)
+ *                                                 + equals/hashCode/toString
+ *   @Value                                      → getters + equals/hashCode/toString (immutable, no setters)
+ *   @Builder / @SuperBuilder                    → static builder()
+ *   @ToString / @EqualsAndHashCode              → those methods
+ *   @Slf4j and the other @Log* annotations      → the `log` field
+ *
+ * Each node is anchored on the field's (or class's) name token — a leaf, so it
+ * pulls in no spurious value-reference scope — carries a `lombok` decorator and
+ * a docstring naming the generating annotation, so it reads as generated rather
+ * than hand-written. Deliberately NOT synthesized: constructors (`new X()`
+ * already links to the class via `instantiates`, and overloaded
+ * @NoArgs/@AllArgs/@RequiredArgs ctors share a name → would collide on a
+ * synthetic node id), the fluent builder setters, and `@Accessors(fluent=true)`
+ * naming. A member the source already declares is never overridden.
+ */
+function synthesizeLombokMembers(classNode: SyntaxNode, ctx: ExtractorContext): void {
+  const classAnns = lombokAnnotationNames(classNode);
+  const classGetter = classAnns.has('Getter');
+  const classSetter = classAnns.has('Setter');
+  const isData = classAnns.has('Data');
+  const isValue = classAnns.has('Value');
+  const hasBuilder = classAnns.has('Builder') || classAnns.has('SuperBuilder');
+  const hasToString = isData || isValue || classAnns.has('ToString');
+  const hasEquals = isData || isValue || classAnns.has('EqualsAndHashCode');
+  const logAnn = [...classAnns].find((a) => LOMBOK_LOG_ANNOTATIONS.has(a));
+
+  const body = getChildByField(classNode, 'body');
+  if (!body) return;
+  const fields = body.namedChildren.filter((c: SyntaxNode) => c.type === 'field_declaration');
+
+  // Leave immediately when nothing Lombok is present, so a non-Lombok class
+  // pays nothing beyond one scan of its direct field declarations (and an
+  // annotated class skips even that — this hook runs for every Java class).
+  const classHasLombok =
+    classGetter || classSetter || isData || isValue || hasBuilder || hasToString || hasEquals || !!logAnn;
+  if (!classHasLombok && !fields.some((f: SyntaxNode) => lombokAnnotationNames(f).size > 0)) {
+    return;
+  }
+
+  // Members already declared directly in this class. Lombok never overrides an
+  // explicit member, so we skip a name the source already has. Methods and
+  // fields are tracked separately: they're distinct namespaces in Java (a
+  // boolean field `isRunning` and its generated getter `isRunning()` coexist),
+  // and the node id is keyed by kind so they never actually collide.
+  const classId = ctx.nodeStack[ctx.nodeStack.length - 1];
+  const classRec = ctx.nodes.find((n) => n.id === classId);
+  const classQN = classRec?.qualifiedName;
+  const takenMethods = new Set<string>();
+  const takenFields = new Set<string>();
+  if (classQN) {
+    for (const n of ctx.nodes) {
+      if (n.filePath === ctx.filePath && n.qualifiedName === `${classQN}::${n.name}`) {
+        if (n.kind === 'method' || n.kind === 'function') takenMethods.add(n.name);
+        else if (n.kind === 'field' || n.kind === 'variable' || n.kind === 'constant' || n.kind === 'property') {
+          takenFields.add(n.name);
+        }
+      }
+    }
+  }
+
+  const classNameNode = getChildByField(classNode, 'name') ?? classNode;
+  const className = classRec?.name ?? getNodeText(classNameNode, ctx.source).trim();
+
+  const emitMethod = (
+    name: string,
+    anchor: SyntaxNode,
+    signature: string,
+    fromAnnotation: string,
+    extra: { returnType?: string; isStatic?: boolean } = {}
+  ): void => {
+    if (!name || takenMethods.has(name)) return;
+    takenMethods.add(name);
+    ctx.createNode('method', name, anchor, {
+      visibility: 'public',
+      signature,
+      docstring: `Lombok-generated (${fromAnnotation})`,
+      decorators: ['lombok'],
+      isStatic: extra.isStatic,
+      returnType: extra.returnType,
+    });
+  };
+
+  // Per-field getters/setters.
+  for (const fd of fields) {
+    const mods = modifierTextOf(fd);
+    if (/\bstatic\b/.test(mods)) continue; // Lombok skips static fields.
+    const isFinal = /\bfinal\b/.test(mods);
+    const fieldAnns = lombokAnnotationNames(fd);
+    const fieldGetter = fieldAnns.has('Getter');
+    const fieldSetter = fieldAnns.has('Setter');
+
+    const wantGetter = classGetter || isData || isValue || fieldGetter;
+    const wantSetter = (classSetter || isData || fieldSetter) && !isFinal;
+    if (!wantGetter && !wantSetter) continue;
+
+    const typeNode = getChildByField(fd, 'type');
+    const typeText = typeNode ? getNodeText(typeNode, ctx.source).trim() : 'Object';
+    const isBooleanPrimitive = typeNode?.type === 'boolean_type';
+    const returnType = normalizeJavaType(typeNode, ctx.source);
+
+    for (const vd of fd.namedChildren) {
+      if (vd.type !== 'variable_declarator') continue;
+      const nameNode = getChildByField(vd, 'name');
+      if (!nameNode) continue;
+      const fieldName = getNodeText(nameNode, ctx.source).trim();
+      if (!fieldName) continue;
+
+      if (wantGetter) {
+        const g = lombokGetterName(fieldName, isBooleanPrimitive);
+        emitMethod(g, nameNode, `${typeText} ${g}()`,
+          fieldGetter ? '@Getter' : isData ? '@Data' : isValue ? '@Value' : '@Getter',
+          { returnType });
+      }
+      if (wantSetter) {
+        const s = lombokSetterName(fieldName, isBooleanPrimitive);
+        emitMethod(s, nameNode, `void ${s}(${typeText} ${fieldName})`,
+          fieldSetter ? '@Setter' : isData ? '@Data' : '@Setter');
+      }
+    }
+  }
+
+  // Class-level synthesized methods.
+  if (hasBuilder) {
+    emitMethod('builder', classNameNode, `static ${className}.${className}Builder builder()`,
+      classAnns.has('SuperBuilder') ? '@SuperBuilder' : '@Builder',
+      { isStatic: true, returnType: `${className}Builder` });
+  }
+  if (hasToString) {
+    emitMethod('toString', classNameNode, 'String toString()',
+      isData ? '@Data' : isValue ? '@Value' : '@ToString');
+  }
+  if (hasEquals) {
+    const from = isData ? '@Data' : isValue ? '@Value' : '@EqualsAndHashCode';
+    emitMethod('equals', classNameNode, 'boolean equals(Object o)', from);
+    emitMethod('hashCode', classNameNode, 'int hashCode()', from);
+  }
+
+  // Logger field (@Slf4j and friends).
+  if (logAnn && !takenFields.has('log')) {
+    takenFields.add('log');
+    ctx.createNode('field', 'log', classNameNode, {
+      visibility: 'private',
+      isStatic: true,
+      signature: 'Logger log',
+      docstring: `Lombok-generated (@${logAnn})`,
+      decorators: ['lombok'],
+    });
+  }
+}
+
 export const javaExtractor: LanguageExtractor = {
 export const javaExtractor: LanguageExtractor = {
   functionTypes: [],
   functionTypes: [],
   classTypes: ['class_declaration'],
   classTypes: ['class_declaration'],
@@ -58,6 +272,7 @@ export const javaExtractor: LanguageExtractor = {
   paramsField: 'parameters',
   paramsField: 'parameters',
   returnField: 'type',
   returnField: 'type',
   getReturnType: extractJavaReturnType,
   getReturnType: extractJavaReturnType,
+  synthesizeMembers: synthesizeLombokMembers,
   getSignature: (node, source) => {
   getSignature: (node, source) => {
     const params = getChildByField(node, 'parameters');
     const params = getChildByField(node, 'parameters');
     const returnType = getChildByField(node, 'type');
     const returnType = getChildByField(node, 'type');

+ 14 - 0
src/extraction/tree-sitter-types.ts

@@ -174,6 +174,20 @@ export interface LanguageExtractor {
    */
    */
   visitNode?: (node: SyntaxNode, ctx: ExtractorContext) => boolean;
   visitNode?: (node: SyntaxNode, ctx: ExtractorContext) => boolean;
 
 
+  /**
+   * Synthesize members that exist at compile time but not in the source AST,
+   * called at the end of class extraction with the class still on the scope
+   * stack (so `ctx.createNode` attaches containment + qualified names) and the
+   * class's real members already extracted (so the hook can skip a member the
+   * source explicitly declares). Used by Java for Lombok-generated accessors
+   * (`@Getter`/`@Setter`/`@Data`/`@Value`/`@Builder` → `getX`/`setX`/`builder`/
+   * `equals`/`hashCode`/`toString` + the `log` field), which are otherwise
+   * invisible and break call-chain analysis (#912). The created nodes carry a
+   * `lombok` decorator + a docstring naming the generating annotation, so an
+   * agent can tell them apart from hand-written code.
+   */
+  synthesizeMembers?: (classNode: SyntaxNode, ctx: ExtractorContext) => void;
+
   /**
   /**
    * Classify a class_declaration node when the grammar reuses one node type
    * Classify a class_declaration node when the grammar reuses one node type
    * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
    * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).

+ 8 - 0
src/extraction/tree-sitter.ts

@@ -1530,6 +1530,14 @@ export class TreeSitterExtractor {
         this.visitNode(child);
         this.visitNode(child);
       }
       }
     }
     }
+
+    // Synthesize compile-time-generated members (Lombok accessors, #912). Runs
+    // after the body so the hook can dedup against hand-written members, and
+    // while the class is still on the stack so containment/QNs attach.
+    if (this.extractor.synthesizeMembers) {
+      this.extractor.synthesizeMembers(node, this.makeExtractorContext());
+    }
+
     this.nodeStack.pop();
     this.nodeStack.pop();
   }
   }
 
 

+ 132 - 44
src/mcp/tools.ts

@@ -134,7 +134,7 @@ export interface ExploreOutputBudget {
   maxCharsPerFile: number;
   maxCharsPerFile: number;
   /** Cluster gap threshold in lines — tighter clustering on small projects. */
   /** Cluster gap threshold in lines — tighter clustering on small projects. */
   gapThreshold: number;
   gapThreshold: number;
-  /** Max symbols listed in the per-file header (`#### path — sym(kind), ...`). */
+  /** Max symbols listed in the per-file header (``**`path`** — sym(kind), ...``). */
   maxSymbolsInFileHeader: number;
   maxSymbolsInFileHeader: number;
   /** Max edges shown per relationship kind in the Relationships section. */
   /** Max edges shown per relationship kind in the Relationships section. */
   maxEdgesPerRelationshipKind: number;
   maxEdgesPerRelationshipKind: number;
@@ -288,6 +288,27 @@ function adaptiveExploreEnabled(): boolean {
   return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
   return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
 }
 }
 
 
+/**
+ * How long the FIRST tool call waits on the post-open catch-up reconcile before
+ * giving up and serving anyway (issue #905). On a normal repo the reconcile
+ * finishes in well under this, so the gate is fully honored and nothing changes.
+ * On a very large repo (~100k files) the reconcile takes minutes — blocking the
+ * first call on all of it presents as a multi-minute hang — so we wait briefly
+ * for a clean answer, then serve and let the reconcile finish in the background
+ * (it yields to the event loop, so a concurrent read still runs).
+ *
+ * `CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS` overrides the default; `0` restores the
+ * old unbounded-wait behavior (always block until the reconcile completes).
+ */
+const DEFAULT_CATCHUP_GATE_TIMEOUT_MS = 3000;
+function resolveCatchUpGateTimeoutMs(): number {
+  const raw = process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
+  if (raw === undefined || raw === '') return DEFAULT_CATCHUP_GATE_TIMEOUT_MS;
+  const n = Number(raw);
+  if (!Number.isFinite(n) || n < 0) return DEFAULT_CATCHUP_GATE_TIMEOUT_MS;
+  return Math.floor(n);
+}
+
 /**
 /**
  * Prefix each line of a source slice with its 1-based line number, matching
  * Prefix each line of a source slice with its 1-based line number, matching
  * the Read tool's `cat -n` convention (number + tab) so the agent treats it
  * the Read tool's `cat -n` convention (number + tab) so the agent treats it
@@ -305,6 +326,23 @@ function numberSourceLines(slice: string, firstLineNumber: number): string {
   return out.join('\n');
   return out.join('\n');
 }
 }
 
 
+/**
+ * Unique line-prefix for a per-file source section in codegraph_explore output.
+ * Issue #778: tool results dropped ATX headings (`####`, `##`, `###`) for bold
+ * labels so Markdown-rendering MCP clients (e.g. the Claude Code VSCode
+ * extension) stop blowing every header up to H1–H4. The path is bold + a code
+ * span so it still reads as a header, and the leading ``**` `` stays a UNIQUE,
+ * greppable marker — no other explore line begins with it — that the explore
+ * truncation boundary (`handleExplore`) and the offload chunker
+ * (`reasoning/reasoner.ts`) both key off to cut on whole file sections.
+ */
+const FILE_SECTION_PREFIX = '**`';
+function fileSectionHeader(filePath: string, suffix: string): string {
+  return suffix
+    ? `${FILE_SECTION_PREFIX}${filePath}\`** — ${suffix}`
+    : `${FILE_SECTION_PREFIX}${filePath}\`**`;
+}
+
 /**
 /**
  * Per-file staleness banner emitted at the top of a tool response when the
  * Per-file staleness banner emitted at the top of a tool response when the
  * file watcher has pending events for files referenced by the response.
  * file watcher has pending events for files referenced by the response.
@@ -667,7 +705,9 @@ export class ToolHandler {
   // this, a tool call that races past `catchUpSync()` serves rows for files
   // this, a tool call that races past `catchUpSync()` serves rows for files
   // that were deleted (or edited) while no MCP server was running — and the
   // that were deleted (or edited) while no MCP server was running — and the
   // per-file staleness banner can't help, because `getPendingFiles()` is
   // per-file staleness banner can't help, because `getPendingFiles()` is
-  // populated by the watcher, not by catch-up. Cleared on first await so
+  // populated by the watcher, not by catch-up. The wait is time-boxed
+  // (see {@link resolveCatchUpGateTimeoutMs}) so a minutes-long reconcile on a
+  // huge repo can't hang the first call (#905); cleared on first await so
   // subsequent calls don't pay any cost.
   // subsequent calls don't pay any cost.
   private catchUpGate: Promise<void> | null = null;
   private catchUpGate: Promise<void> | null = null;
 
 
@@ -691,6 +731,43 @@ export class ToolHandler {
     this.catchUpGate = p;
     this.catchUpGate = p;
   }
   }
 
 
+  /**
+   * Await the catch-up gate, but no longer than the configured timeout (#905).
+   * If the reconcile settles first, we got the fully-reconciled answer. If the
+   * timeout wins, we serve the call now and let the reconcile finish in the
+   * background — it yields to the event loop (see SYNC_RECONCILE_YIELD_INTERVAL),
+   * so a concurrent read still runs against the same connection. Never throws:
+   * a failed reconcile is logged by the engine, and we serve best-effort over
+   * the same potentially-stale data the un-gated path would have.
+   */
+  private async awaitCatchUpGate(gate: Promise<void>): Promise<void> {
+    const timeoutMs = resolveCatchUpGateTimeoutMs();
+    if (timeoutMs <= 0) {
+      // 0 = opt back into the original unbounded wait.
+      try { await gate; } catch { /* engine already logged */ }
+      return;
+    }
+    let timer: NodeJS.Timeout | undefined;
+    const timedOut = new Promise<'timeout'>((resolve) => {
+      timer = setTimeout(() => resolve('timeout'), timeoutMs);
+      timer.unref?.();
+    });
+    try {
+      const outcome = await Promise.race([
+        gate.then(() => 'done' as const, () => 'done' as const),
+        timedOut,
+      ]);
+      if (outcome === 'timeout') {
+        process.stderr.write(
+          `[CodeGraph MCP] Catch-up reconcile still running after ${timeoutMs}ms; serving this tool call now and finishing the reconcile in the background (#905). ` +
+          `Set CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS=0 to always wait for it.\n`
+        );
+      }
+    } finally {
+      if (timer) clearTimeout(timer);
+    }
+  }
+
   /**
   /**
    * Record the directory the server tried to resolve the default project from.
    * Record the directory the server tried to resolve the default project from.
    * Used only to make the "no default project" error actionable.
    * Used only to make the "no default project" error actionable.
@@ -1128,13 +1205,16 @@ export class ToolHandler {
     try {
     try {
       // Block the first tool call on the engine's post-open reconcile so we
       // Block the first tool call on the engine's post-open reconcile so we
       // never serve rows for files deleted/edited while no MCP server was
       // never serve rows for files deleted/edited while no MCP server was
-      // running. The gate is cleared after first await — subsequent calls
-      // pay nothing. Catch-up failures are logged by the engine; we
-      // proceed regardless so a transient sync error never breaks tools.
+      // running. The wait is time-boxed (#905): a huge-repo reconcile takes
+      // minutes, and blocking the first call on all of it reads as a hang, so
+      // we wait briefly then serve and let it finish in the background. The
+      // gate is cleared after first await — subsequent calls pay nothing.
+      // Catch-up failures are logged by the engine; we proceed regardless so a
+      // transient sync error never breaks tools.
       if (this.catchUpGate) {
       if (this.catchUpGate) {
         const gate = this.catchUpGate;
         const gate = this.catchUpGate;
         this.catchUpGate = null;
         this.catchUpGate = null;
-        try { await gate; } catch { /* engine already logged */ }
+        await this.awaitCatchUpGate(gate);
       }
       }
       // Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed
       // Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed
       // surface rejects ablated tools defensively even if a client cached them.
       // surface rejects ablated tools defensively even if a client cached them.
@@ -1287,7 +1367,7 @@ export class ToolHandler {
   private definitionHeading(group: Node[]): string {
   private definitionHeading(group: Node[]): string {
     const head = group[0]!;
     const head = group[0]!;
     const line = head.startLine ? `:${head.startLine}` : '';
     const line = head.startLine ? `:${head.startLine}` : '';
-    return `### ${head.qualifiedName} (${head.kind}) — ${head.filePath}${line}`;
+    return `**${head.qualifiedName}** (${head.kind}) — ${head.filePath}${line}`;
   }
   }
 
 
   /**
   /**
@@ -1345,7 +1425,7 @@ export class ToolHandler {
     // agent never mistakes one app's callers for another's. Narrow with
     // agent never mistakes one app's callers for another's. Narrow with
     // `file` to focus a single definition.
     // `file` to focus a single definition.
     const lines: string[] = [
     const lines: string[] = [
-      `## Callers of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)`,
+      `**Callers of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)**`,
     ];
     ];
     for (const group of groups) {
     for (const group of groups) {
       const { callers, labels } = collect(group);
       const { callers, labels } = collect(group);
@@ -1415,7 +1495,7 @@ export class ToolHandler {
 
 
     // Multiple DISTINCT definitions (#764): per-definition sections.
     // Multiple DISTINCT definitions (#764): per-definition sections.
     const lines: string[] = [
     const lines: string[] = [
-      `## Callees of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)`,
+      `**Callees of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)**`,
     ];
     ];
     for (const group of groups) {
     for (const group of groups) {
       const { callees, labels } = collect(group);
       const { callees, labels } = collect(group);
@@ -1484,7 +1564,7 @@ export class ToolHandler {
     // merging unrelated same-named classes (one UserService per monorepo app)
     // merging unrelated same-named classes (one UserService per monorepo app)
     // overstated impact and confused agents. Narrow with `file`.
     // overstated impact and confused agents. Narrow with `file`.
     const sections: string[] = [
     const sections: string[] = [
-      `## Impact of ${symbol} — ${groups.length} distinct definitions (each with its own blast radius; narrow with \`file\`)`,
+      `**Impact of ${symbol} — ${groups.length} distinct definitions (each with its own blast radius; narrow with \`file\`)**`,
     ];
     ];
     for (const group of groups) {
     for (const group of groups) {
       const head = group[0]!;
       const head = group[0]!;
@@ -1563,6 +1643,14 @@ export class ToolHandler {
         registeredAt,
         registeredAt,
       };
       };
     }
     }
+    if (m?.synthesizedBy === 'fn-pointer-dispatch') {
+      const via = m.via ? `\`${String(m.via)}\`` : 'a function pointer';
+      return {
+        label: `function-pointer dispatch via ${via} (dynamic dispatch)`,
+        compact: `dynamic: fn-pointer ${m.via ? String(m.via) : ''}${at}`,
+        registeredAt,
+      };
+    }
     // Generic fallback for any other synthesizer (redux-thunk, gin-middleware-chain,
     // Generic fallback for any other synthesizer (redux-thunk, gin-middleware-chain,
     // flutter-build, …): a synthesized hop must never read as a bare static `calls`.
     // flutter-build, …): a synthesized hop must never read as a bare static `calls`.
     // It's a dynamic-dispatch bridge — label it as one and keep its wiring site.
     // It's a dynamic-dispatch bridge — label it as one and keep its wiring site.
@@ -1702,7 +1790,7 @@ export class ToolHandler {
         if (synthLines.length === 0 && !boundaries) return EMPTY;
         if (synthLines.length === 0 && !boundaries) return EMPTY;
         const out: string[] = [];
         const out: string[] = [];
         if (synthLines.length) out.push(
         if (synthLines.length) out.push(
-          '## Dynamic-dispatch links among your symbols',
+          '**Dynamic-dispatch links among your symbols**',
           '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)',
           '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)',
           '', ...synthLines, '');
           '', ...synthLines, '');
         if (boundaries) out.push(boundaries);
         if (boundaries) out.push(boundaries);
@@ -1817,7 +1905,7 @@ export class ToolHandler {
       if (!hasMain && synthLines.length === 0 && !boundaryText && !polyText) return EMPTY;
       if (!hasMain && synthLines.length === 0 && !boundaryText && !polyText) return EMPTY;
       const out: string[] = [];
       const out: string[] = [];
       if (hasMain) {
       if (hasMain) {
-        out.push('## Flow (call path among the symbols you queried)', '');
+        out.push('**Flow (call path among the symbols you queried)**', '');
         for (let i = 0; i < best!.length; i++) {
         for (let i = 0; i < best!.length; i++) {
           const step = best![i]!;
           const step = best![i]!;
           if (step.edge) { const sy = this.synthEdgeNote(step.edge); out.push(`   ↓ ${sy ? sy.compact : step.edge.kind}`); }
           if (step.edge) { const sy = this.synthEdgeNote(step.edge); out.push(`   ↓ ${sy ? sy.compact : step.edge.kind}`); }
@@ -1827,7 +1915,7 @@ export class ToolHandler {
       }
       }
       if (synthLines.length) {
       if (synthLines.length) {
         out.push(
         out.push(
-          '## Dynamic-dispatch links among your symbols',
+          '**Dynamic-dispatch links among your symbols**',
           '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)',
           '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)',
           '',
           '',
           ...synthLines,
           ...synthLines,
@@ -1895,7 +1983,7 @@ export class ToolHandler {
     }
     }
     if (notes.length === 0) return '';
     if (notes.length === 0) return '';
     return [
     return [
-      '## Dynamic boundaries (the static path ends at runtime dispatch)',
+      '**Dynamic boundaries (the static path ends at runtime dispatch)**',
       '',
       '',
       ...notes,
       ...notes,
       '',
       '',
@@ -1982,7 +2070,7 @@ export class ToolHandler {
     }
     }
     if (notes.length === 0) return '';
     if (notes.length === 0) return '';
     return [
     return [
-      '## Interface dispatch (a named method has many implementations)',
+      '**Interface dispatch (a named method has many implementations)**',
       '',
       '',
       ...notes,
       ...notes,
       '',
       '',
@@ -2111,7 +2199,7 @@ export class ToolHandler {
     if (entries.length === 0) return '';
     if (entries.length === 0) return '';
 
 
     return [
     return [
-      '### Blast radius — what depends on these (update/verify before editing)',
+      '**Blast radius — what depends on these (update/verify before editing)**',
       '',
       '',
       ...entries,
       ...entries,
       '',
       '',
@@ -2580,7 +2668,7 @@ export class ToolHandler {
 
 
     // Step 3: Build relationship map
     // Step 3: Build relationship map
     const lines: string[] = [
     const lines: string[] = [
-      `## Exploration: ${query}`,
+      `**Exploration: ${query}**`,
       '',
       '',
       `Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
       `Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
       '',
       '',
@@ -2598,7 +2686,7 @@ export class ToolHandler {
     );
     );
 
 
     if (budget.includeRelationships && significantEdges.length > 0) {
     if (budget.includeRelationships && significantEdges.length > 0) {
-      lines.push('### Relationships');
+      lines.push('**Relationships**');
       lines.push('');
       lines.push('');
 
 
       // Group edges by kind for readability
       // Group edges by kind for readability
@@ -2685,7 +2773,7 @@ export class ToolHandler {
       return false;
       return false;
     };
     };
 
 
-    lines.push('### Source Code');
+    lines.push('**Source Code**');
     lines.push('');
     lines.push('');
     lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
     lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
     lines.push('');
     lines.push('');
@@ -2829,7 +2917,7 @@ export class ToolHandler {
           const tag = bodyIds.size > 0
           const tag = bodyIds.size > 0
             ? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
             ? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
             : 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
             : 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
-          lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
+          lines.push(fileSectionHeader(filePath, `${names} · ${tag}`), '', '```' + lang, skel.join('\n'), '```', '');
           totalChars += skel.join('\n').length + 120;
           totalChars += skel.join('\n').length + 120;
           filesIncluded++;
           filesIncluded++;
           continue;
           continue;
@@ -2870,7 +2958,7 @@ export class ToolHandler {
         )];
         )];
         const headerNames = uniqSymbols.slice(0, budget.maxSymbolsInFileHeader);
         const headerNames = uniqSymbols.slice(0, budget.maxSymbolsInFileHeader);
         const omitted = uniqSymbols.length - headerNames.length;
         const omitted = uniqSymbols.length - headerNames.length;
-        const wholeHeader = `#### ${filePath} — ${omitted > 0 ? `${headerNames.join(', ')}, +${omitted} more` : headerNames.join(', ')}`;
+        const wholeHeader = fileSectionHeader(filePath, omitted > 0 ? `${headerNames.join(', ')}, +${omitted} more` : headerNames.join(', '));
 
 
         if (!fileNecessary && totalChars + wholeSection.length + 200 > budget.maxOutputChars) {
         if (!fileNecessary && totalChars + wholeSection.length + 200 > budget.maxOutputChars) {
           // Don't slice a whole file mid-method: an incidental file that doesn't
           // Don't slice a whole file mid-method: an incidental file that doesn't
@@ -3137,7 +3225,7 @@ export class ToolHandler {
       const headerSuffix = omittedCount > 0
       const headerSuffix = omittedCount > 0
         ? `${headerSymbols.join(', ')}, +${omittedCount} more`
         ? `${headerSymbols.join(', ')}, +${omittedCount} more`
         : headerSymbols.join(', ');
         : headerSymbols.join(', ');
-      const fileHeader = `#### ${filePath} — ${headerSuffix}`;
+      const fileHeader = fileSectionHeader(filePath, headerSuffix);
 
 
       // The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
       // The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
       // the agent named (or that's on the flow spine) renders even when the
       // the agent named (or that's on the flow spine) renders even when the
@@ -3178,7 +3266,7 @@ export class ToolHandler {
         .sort((a, b) => b[1].score - a[1].score);
         .sort((a, b) => b[1].score - a[1].score);
       const remainingFiles = [...remainingRelevant, ...peripheralFiles];
       const remainingFiles = [...remainingRelevant, ...peripheralFiles];
       if (remainingFiles.length > 0) {
       if (remainingFiles.length > 0) {
-        lines.push('### Not shown above — explore these names for their source');
+        lines.push('**Not shown above — explore these names for their source**');
         lines.push('');
         lines.push('');
         for (const [filePath, group] of remainingFiles.slice(0, 10)) {
         for (const [filePath, group] of remainingFiles.slice(0, 10)) {
           const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
           const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
@@ -3227,13 +3315,13 @@ export class ToolHandler {
 
 
     const hardCeiling = Math.min(Math.round(budget.maxOutputChars * 1.5), 25000);
     const hardCeiling = Math.min(Math.round(budget.maxOutputChars * 1.5), 25000);
     if (output.length > hardCeiling) {
     if (output.length > hardCeiling) {
-      // Cut at a FILE-SECTION boundary (the last `#### ` header before the
+      // Cut at a FILE-SECTION boundary (the last ``**` `` file header before the
       // ceiling) so we drop whole trailing file-sections rather than slicing
       // ceiling) so we drop whole trailing file-sections rather than slicing
       // through a method body — a half-rendered method just forces the Read this
       // through a method body — a half-rendered method just forces the Read this
       // tool exists to prevent. Fall back to a line boundary only if no section
       // tool exists to prevent. Fall back to a line boundary only if no section
       // header sits in the back half (degenerate single-giant-section case).
       // header sits in the back half (degenerate single-giant-section case).
       const cut = output.slice(0, hardCeiling);
       const cut = output.slice(0, hardCeiling);
-      const lastSection = cut.lastIndexOf('\n#### ');
+      const lastSection = cut.lastIndexOf('\n' + FILE_SECTION_PREFIX);
       const boundary = lastSection > hardCeiling * 0.5 ? lastSection : cut.lastIndexOf('\n');
       const boundary = lastSection > hardCeiling * 0.5 ? lastSection : cut.lastIndexOf('\n');
       const safe = boundary > 0 ? cut.slice(0, boundary) : cut;
       const safe = boundary > 0 ? cut.slice(0, boundary) : cut;
       return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
       return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
@@ -3348,7 +3436,7 @@ export class ToolHandler {
       const shownList = listed.slice(0, LIST_CAP);
       const shownList = listed.slice(0, LIST_CAP);
       out.push(
       out.push(
         '',
         '',
-        '### Other definitions',
+        '**Other definitions**',
         ...shownList.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`),
         ...shownList.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`),
       );
       );
       if (listed.length > LIST_CAP) out.push(`- … +${listed.length - LIST_CAP} more`);
       if (listed.length > LIST_CAP) out.push(`- … +${listed.length - LIST_CAP} more`);
@@ -3430,7 +3518,7 @@ export class ToolHandler {
     // symbolsOnly → the cheap structural overview, no source.
     // symbolsOnly → the cheap structural overview, no source.
     if (opts.symbolsOnly) {
     if (opts.symbolsOnly) {
       const out = [`**${filePath}** — ${nodes.length} symbol${nodes.length === 1 ? '' : 's'}, ${depSummary}`, ''];
       const out = [`**${filePath}** — ${nodes.length} symbol${nodes.length === 1 ? '' : 's'}, ${depSummary}`, ''];
-      if (nodes.length) out.push(...symbolMap('### Symbols'));
+      if (nodes.length) out.push(...symbolMap('**Symbols**'));
       else out.push('_No indexed symbols in this file._');
       else out.push('_No indexed symbols in this file._');
       out.push('', '> Drop `symbolsOnly` (or pass `offset`/`limit`) to read the source, like Read.');
       out.push('', '> Drop `symbolsOnly` (or pass `offset`/`limit`) to read the source, like Read.');
       return this.textResult(this.truncateOutput(out.join('\n')));
       return this.textResult(this.truncateOutput(out.join('\n')));
@@ -3440,7 +3528,7 @@ export class ToolHandler {
     // line is `key: <secret>`. Summarize by key and point to a real Read.
     // line is `key: <secret>`. Summarize by key and point to a real Read.
     if (CONFIG_LEAF_LANGUAGES.has(resolved.language)) {
     if (CONFIG_LEAF_LANGUAGES.has(resolved.language)) {
       const out = [`**${filePath}** — configuration/data file, ${depSummary}`, ''];
       const out = [`**${filePath}** — configuration/data file, ${depSummary}`, ''];
-      if (nodes.length) out.push(...symbolMap('### Keys (values withheld for safety)'));
+      if (nodes.length) out.push(...symbolMap('**Keys (values withheld for safety)**'));
       out.push('', '> Values may be secrets, so codegraph indexes keys only. Read the file directly if you need a value.');
       out.push('', '> Values may be secrets, so codegraph indexes keys only. Read the file directly if you need a value.');
       return this.textResult(this.truncateOutput(out.join('\n')));
       return this.textResult(this.truncateOutput(out.join('\n')));
     }
     }
@@ -3454,7 +3542,7 @@ export class ToolHandler {
     }
     }
     if (content === null) {
     if (content === null) {
       const out = [`**${filePath}** — could not read from disk (it may have moved since indexing). ${depSummary}`, ''];
       const out = [`**${filePath}** — could not read from disk (it may have moved since indexing). ${depSummary}`, ''];
-      if (nodes.length) out.push(...symbolMap('### Symbols'));
+      if (nodes.length) out.push(...symbolMap('**Symbols**'));
       out.push('', `> Read \`${filePath}\` directly for its current content.`);
       out.push('', `> Read \`${filePath}\` directly for its current content.`);
       return this.textResult(this.truncateOutput(out.join('\n')));
       return this.textResult(this.truncateOutput(out.join('\n')));
     }
     }
@@ -3551,7 +3639,7 @@ export class ToolHandler {
     const callees = collect(cg.getCallees(node.id));
     const callees = collect(cg.getCallees(node.id));
     const callers = collect(cg.getCallers(node.id));
     const callers = collect(cg.getCallers(node.id));
     if (callees.length === 0 && callers.length === 0) return '';
     if (callees.length === 0 && callers.length === 0) return '';
-    const lines: string[] = ['', '### Trail — codegraph_node any of these to follow it (no Read needed)'];
+    const lines: string[] = ['', '**Trail — codegraph_node any of these to follow it (no Read needed)**'];
     if (callees.length > 0) {
     if (callees.length > 0) {
       lines.push(`**Calls →** ${callees.slice(0, TRAIL_CAP).map(fmt).join(', ')}${callees.length > TRAIL_CAP ? `, +${callees.length - TRAIL_CAP} more` : ''}`);
       lines.push(`**Calls →** ${callees.slice(0, TRAIL_CAP).map(fmt).join(', ')}${callees.length > TRAIL_CAP ? `, +${callees.length - TRAIL_CAP} more` : ''}`);
     }
     }
@@ -3587,7 +3675,7 @@ export class ToolHandler {
     const mismatch = this.worktreeMismatchFor(args.projectPath as string | undefined);
     const mismatch = this.worktreeMismatchFor(args.projectPath as string | undefined);
 
 
     const lines: string[] = [
     const lines: string[] = [
-      '## CodeGraph Status',
+      '**CodeGraph Status**',
       '',
       '',
     ];
     ];
     if (mismatch) {
     if (mismatch) {
@@ -3618,7 +3706,7 @@ export class ToolHandler {
       );
       );
     }
     }
 
 
-    lines.push('', '### Nodes by Kind:');
+    lines.push('', '**Nodes by Kind:**');
 
 
     for (const [kind, count] of Object.entries(stats.nodesByKind)) {
     for (const [kind, count] of Object.entries(stats.nodesByKind)) {
       if ((count as number) > 0) {
       if ((count as number) > 0) {
@@ -3626,7 +3714,7 @@ export class ToolHandler {
       }
       }
     }
     }
 
 
-    lines.push('', '### Languages:');
+    lines.push('', '**Languages:**');
     for (const [lang, count] of Object.entries(stats.filesByLanguage)) {
     for (const [lang, count] of Object.entries(stats.filesByLanguage)) {
       if ((count as number) > 0) {
       if ((count as number) > 0) {
         lines.push(`- ${lang}: ${count}`);
         lines.push(`- ${lang}: ${count}`);
@@ -3640,7 +3728,7 @@ export class ToolHandler {
     if (cg.isWatcherDegraded()) {
     if (cg.isWatcherDegraded()) {
       lines.push(
       lines.push(
         '',
         '',
-        '### Auto-sync disabled:',
+        '**Auto-sync disabled:**',
         `- ${cg.getWatcherDegradedReason() ?? 'live file watching stopped'}`,
         `- ${cg.getWatcherDegradedReason() ?? 'live file watching stopped'}`,
         '- The index is frozen; Read files directly for current content.'
         '- The index is frozen; Read files directly for current content.'
       );
       );
@@ -3652,7 +3740,7 @@ export class ToolHandler {
     // banners on other tool calls.
     // banners on other tool calls.
     const pending = cg.getPendingFiles();
     const pending = cg.getPendingFiles();
     if (pending.length > 0) {
     if (pending.length > 0) {
-      lines.push('', '### Pending sync:');
+      lines.push('', '**Pending sync:**');
       const now = Date.now();
       const now = Date.now();
       for (const p of pending) {
       for (const p of pending) {
         const ageMs = Math.max(0, now - p.lastSeenMs);
         const ageMs = Math.max(0, now - p.lastSeenMs);
@@ -3743,7 +3831,7 @@ export class ToolHandler {
    * Format files as a flat list
    * Format files as a flat list
    */
    */
   private formatFilesFlat(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string {
   private formatFilesFlat(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string {
-    const lines: string[] = [`## Files (${files.length})`, ''];
+    const lines: string[] = [`**Files (${files.length})**`, ''];
 
 
     for (const file of files.sort((a, b) => a.path.localeCompare(b.path))) {
     for (const file of files.sort((a, b) => a.path.localeCompare(b.path))) {
       if (includeMetadata) {
       if (includeMetadata) {
@@ -3768,13 +3856,13 @@ export class ToolHandler {
       byLang.set(file.language, existing);
       byLang.set(file.language, existing);
     }
     }
 
 
-    const lines: string[] = [`## Files by Language (${files.length} total)`, ''];
+    const lines: string[] = [`**Files by Language (${files.length} total)**`, ''];
 
 
     // Sort languages by file count (descending)
     // Sort languages by file count (descending)
     const sortedLangs = [...byLang.entries()].sort((a, b) => b[1].length - a[1].length);
     const sortedLangs = [...byLang.entries()].sort((a, b) => b[1].length - a[1].length);
 
 
     for (const [lang, langFiles] of sortedLangs) {
     for (const [lang, langFiles] of sortedLangs) {
-      lines.push(`### ${lang} (${langFiles.length})`);
+      lines.push(`**${lang} (${langFiles.length})**`);
       for (const file of langFiles.sort((a, b) => a.path.localeCompare(b.path))) {
       for (const file of langFiles.sort((a, b) => a.path.localeCompare(b.path))) {
         if (includeMetadata) {
         if (includeMetadata) {
           lines.push(`- ${file.path} (${file.nodeCount} symbols)`);
           lines.push(`- ${file.path} (${file.nodeCount} symbols)`);
@@ -3826,7 +3914,7 @@ export class ToolHandler {
     }
     }
 
 
     // Render tree
     // Render tree
-    const lines: string[] = [`## Project Structure (${files.length} files)`, ''];
+    const lines: string[] = [`**Project Structure (${files.length} files)**`, ''];
 
 
     const renderNode = (node: TreeNode, prefix: string, isLast: boolean, depth: number): void => {
     const renderNode = (node: TreeNode, prefix: string, isLast: boolean, depth: number): void => {
       if (maxDepth !== undefined && depth > maxDepth) return;
       if (maxDepth !== undefined && depth > maxDepth) return;
@@ -4039,13 +4127,13 @@ export class ToolHandler {
   // =========================================================================
   // =========================================================================
 
 
   private formatSearchResults(results: SearchResult[]): string {
   private formatSearchResults(results: SearchResult[]): string {
-    const lines: string[] = [`## Search Results (${results.length} found)`, ''];
+    const lines: string[] = [`**Search Results (${results.length} found)**`, ''];
 
 
     for (const result of results) {
     for (const result of results) {
       const { node } = result;
       const { node } = result;
       const location = node.startLine ? `:${node.startLine}` : '';
       const location = node.startLine ? `:${node.startLine}` : '';
       // Compact format: one line per result with key info
       // Compact format: one line per result with key info
-      lines.push(`### ${node.name} (${node.kind})`);
+      lines.push(`**${node.name}** (${node.kind})`);
       lines.push(`${node.filePath}${location}`);
       lines.push(`${node.filePath}${location}`);
       if (node.signature) lines.push(`\`${node.signature}\``);
       if (node.signature) lines.push(`\`${node.signature}\``);
       lines.push('');
       lines.push('');
@@ -4055,7 +4143,7 @@ export class ToolHandler {
   }
   }
 
 
   private formatNodeList(nodes: Node[], title: string, labels?: Map<string, string>): string {
   private formatNodeList(nodes: Node[], title: string, labels?: Map<string, string>): string {
-    const lines: string[] = [`## ${title} (${nodes.length} found)`, ''];
+    const lines: string[] = [`**${title} (${nodes.length} found)**`, ''];
 
 
     for (const node of nodes) {
     for (const node of nodes) {
       const location = node.startLine ? `:${node.startLine}` : '';
       const location = node.startLine ? `:${node.startLine}` : '';
@@ -4090,7 +4178,7 @@ export class ToolHandler {
 
 
     // Compact format: just list affected symbols grouped by file
     // Compact format: just list affected symbols grouped by file
     const lines: string[] = [
     const lines: string[] = [
-      `## Impact: "${symbol}" affects ${nodeCount} symbols`,
+      `**Impact: "${symbol}" affects ${nodeCount} symbols**`,
       '',
       '',
     ];
     ];
 
 
@@ -4138,7 +4226,7 @@ export class ToolHandler {
   private formatNodeDetails(node: Node, code: string | null, outline?: string | null): string {
   private formatNodeDetails(node: Node, code: string | null, outline?: string | null): string {
     const location = node.startLine ? `:${node.startLine}` : '';
     const location = node.startLine ? `:${node.startLine}` : '';
     const lines: string[] = [
     const lines: string[] = [
-      `## ${node.name} (${node.kind})`,
+      `**${node.name}** (${node.kind})`,
       '',
       '',
       `**Location:** ${node.filePath}${location}`,
       `**Location:** ${node.filePath}${location}`,
     ];
     ];

+ 9 - 7
src/reasoning/reasoner.ts

@@ -124,8 +124,8 @@ CORRECTNESS OVERRIDES EVERYTHING. Being incomplete is fine; being WRONG is not 
 const SYSTEM_PROMPT_REPORT = `${ROLE}
 const SYSTEM_PROMPT_REPORT = `${ROLE}
 
 
 Produce a single self-contained exploration report, formatted exactly like the summary a thorough senior engineer hands back after investigating. Clean Markdown, in this shape:
 Produce a single self-contained exploration report, formatted exactly like the summary a thorough senior engineer hands back after investigating. Clean Markdown, in this shape:
-- Open with the one-line coverage verdict (above). Then, ONLY if covered, a title: "## <Topic> — <Flow / Trace / Overview>". If coverage is not-found, the verdict + the names to explore next is the entire reply. NO preamble ("Here is", "Now I understand").
-- Body is numbered sections with bold headers: "### 1. **<step or aspect>**", "### 2. **<...>**", …
+- Open with the one-line coverage verdict (above). Then, ONLY if covered, a bold title: "**<Topic> — <Flow / Trace / Overview>**". If coverage is not-found, the verdict + the names to explore next is the entire reply. NO preamble ("Here is", "Now I understand"). Use bold labels for headers, never Markdown ATX headings (\`#\`/\`##\`) — they render oversized in some clients.
+- Body is numbered sections with bold headers: "**1. <step or aspect>**", "**2. <...>**", …
 - Cite every location inline and in bold as **\`path/to/file.ts:line\`** (or a line range), exactly as given in the source. Bold key classes, methods, and symbols.
 - Cite every location inline and in bold as **\`path/to/file.ts:line\`** (or a line range), exactly as given in the source. Bold key classes, methods, and symbols.
 - For a flow/path question, include a call-chain diagram in a fenced code block using down-arrows:
 - For a flow/path question, include a call-chain diagram in a fenced code block using down-arrows:
   \`\`\`
   \`\`\`
@@ -135,7 +135,7 @@ Produce a single self-contained exploration report, formatted exactly like the s
   \`\`\`
   \`\`\`
 - Quote only the code lines that carry the logic, in fenced code blocks, keeping their line numbers. Keep snippets tight.
 - Quote only the code lines that carry the logic, in fenced code blocks, keeping their line numbers. Keep snippets tight.
 - Separate major sections with a "---" rule.
 - Separate major sections with a "---" rule.
-- End with "### Summary" — the end-to-end chain in one compact block.
+- End with "**Summary**" — the end-to-end chain in one compact block.
 
 
 Be precise and dense — an engineer should be able to act from this report without opening a file.`;
 Be precise and dense — an engineer should be able to act from this report without opening a file.`;
 
 
@@ -172,11 +172,13 @@ export function stripAgentDirectives(context: string): string {
   let i = 0;
   let i = 0;
   while (i < lines.length) {
   while (i < lines.length) {
     const ln = lines[i] ?? '';
     const ln = lines[i] ?? '';
-    if (/^##\s+Exploration:/.test(ln) || /^Found \d+ symbols? across \d+ files?/.test(ln)) { i++; continue; }
-    // "Not shown above" pointer section: drop header + its bullets/blanks until the next rule/heading/blockquote.
-    if (/^###\s+Not shown above/i.test(ln)) {
+    // Headers are bold labels, not ATX headings (tools.ts, issue #778): the
+    // explore header is `**Exploration: …**`, file sections start with ``**` ``.
+    if (/^\*\*Exploration:/.test(ln) || /^Found \d+ symbols? across \d+ files?/.test(ln)) { i++; continue; }
+    // "Not shown above" pointer section: drop header + its bullets/blanks until the next rule/header/blockquote.
+    if (/^\*\*Not shown above/i.test(ln)) {
       i++;
       i++;
-      while (i < lines.length && !/^(---|#{2,4}\s|>\s)/.test(lines[i] ?? '')) i++;
+      while (i < lines.length && !/^(---|\*\*|>\s)/.test(lines[i] ?? '')) i++;
       continue;
       continue;
     }
     }
     // Agent-directed blockquote notes (completeness / budget / trimmed).
     // Agent-directed blockquote notes (completeness / budget / trimmed).

+ 359 - 0
src/resolution/c-fnptr-synthesizer.ts

@@ -0,0 +1,359 @@
+/**
+ * C/C++ function-pointer dispatch synthesis (#932).
+ *
+ * C/C++ polymorphism is the function pointer: a struct carries a fn-pointer
+ * field (`int (*fn)(int)`, or a fn-pointer-typedef field `hook_func func`),
+ * concrete functions are *registered* into it through a table
+ * (`static struct cmd cmds[] = {{"add", cmd_add}, …}`, a designated
+ * `.fn = cmd_add`, or `x->fn = cmd_add`), and the dispatcher calls through it
+ * indirectly (`p->fn(argv)`). Static extraction captures neither the
+ * registration→field binding nor the indirect call, so the dispatcher→handler
+ * edge is missing and `git`'s `run_builtin` looks like it calls nothing, the
+ * hooks in `hook_demo.c` are unreachable, etc.
+ *
+ * This bridges it, keyed by **(struct type, fn-pointer field)**:
+ *   • registrations — a function bound to `S.field` via a positional
+ *     initializer (matched by field index), a designated `.field = fn`, or a
+ *     direct `x.field = fn` / `x->field = fn` assignment;
+ *   • dispatch — `recv->field(…)` / `recv.field(…)` where `recv` resolves to a
+ *     value of struct type `S` (from the enclosing function's params / locals),
+ *     falling back to the field name when it is unique to one struct;
+ *   • field←field propagation — `a->f = b->g` merges `B.g`'s handlers into
+ *     `A.f`, so a generic single-slot hook that is reassigned from a registry
+ *     (the `hook_demo.c` shape: `h->func = found->fn`) still resolves.
+ *
+ * Whole-graph pass after base resolution; all edges are `provenance:'heuristic'`
+ * (`synthesizedBy:'fn-pointer-dispatch'`). High precision via the (type, field)
+ * key + a real-function gate; a project with no fn-pointer dispatch is a no-op.
+ */
+import type { Edge, Node } from '../types';
+import type { QueryBuilder } from '../db/queries';
+import type { ResolutionContext } from './types';
+import { stripCommentsForRegex } from './strip-comments';
+
+const C_CPP_EXT = /\.(c|h|cc|cpp|cxx|hpp|hh|hxx|cppm|ipp|inl|tcc)$/i;
+const FN_KINDS = new Set(['function', 'method']);
+const FANOUT_CAP = 300; // a real command table (git ~150) is legitimate fan-out; this only stops pathological cases.
+
+/** A struct field, in declaration order, flagged when it is a function pointer. */
+interface FieldInfo {
+  name: string;
+  index: number;
+  isFnPtr: boolean;
+}
+
+function sliceLines(content: string, startLine?: number, endLine?: number): string {
+  if (!startLine) return '';
+  return content.split('\n').slice(startLine - 1, endLine ?? startLine).join('\n');
+}
+
+/** Index of the `}` matching the `{` at `open` (which must point at a `{`). -1 if unbalanced. */
+function matchBrace(src: string, open: number): number {
+  let depth = 0;
+  for (let i = open; i < src.length; i++) {
+    const c = src[i];
+    if (c === '{') depth++;
+    else if (c === '}') {
+      depth--;
+      if (depth === 0) return i;
+    }
+  }
+  return -1;
+}
+
+/** Split `body` on `sep` at brace/paren/bracket depth 0 (commas inside `{…}` / `(…)` stay together). */
+function splitTopLevel(body: string, sep: string): string[] {
+  const out: string[] = [];
+  let depth = 0;
+  let start = 0;
+  for (let i = 0; i < body.length; i++) {
+    const c = body[i]!;
+    if (c === '{' || c === '(' || c === '[') depth++;
+    else if (c === '}' || c === ')' || c === ']') depth--;
+    else if (c === sep && depth === 0) {
+      out.push(body.slice(start, i));
+      start = i + 1;
+    }
+  }
+  out.push(body.slice(start));
+  return out;
+}
+
+/** A fn-pointer field looks like `… (*name)(…)` — capture `name`. */
+const FNPTR_DECL_RE = /\(\s*\*\s*(\w+)\s*\)\s*\(/;
+/** `typedef RET (*NAME)(…)` — a function-pointer typedef. */
+const FNPTR_TYPEDEF_RE = /\btypedef\b[^;{}]*?\(\s*\*\s*(\w+)\s*\)\s*\(/g;
+
+export function cFnPointerDispatchEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] {
+  const files = ctx.getAllFiles().filter((f) => C_CPP_EXT.test(f));
+  if (files.length === 0) return [];
+
+  // Cache stripped source per file (read once, reused across passes).
+  const srcCache = new Map<string, string>();
+  const src = (file: string): string | null => {
+    if (srcCache.has(file)) return srcCache.get(file)!;
+    const raw = ctx.readFile(file);
+    const s = raw == null ? '' : stripCommentsForRegex(raw, 'c');
+    srcCache.set(file, s);
+    return raw == null ? null : s;
+  };
+
+  // ---- Pass A: function-pointer typedefs (cross-file) ----
+  const fnPtrTypedefs = new Set<string>();
+  for (const file of files) {
+    const s = src(file);
+    if (!s || !s.includes('typedef')) continue;
+    FNPTR_TYPEDEF_RE.lastIndex = 0;
+    let m: RegExpExecArray | null;
+    while ((m = FNPTR_TYPEDEF_RE.exec(s))) fnPtrTypedefs.add(m[1]!);
+  }
+
+  // ---- Pass B: struct field layouts ----
+  // structLayout: struct name → ordered fields (with fn-pointer flag).
+  // fieldToStructs: fn-pointer field name → set of struct names that declare it.
+  const structLayout = new Map<string, FieldInfo[]>();
+  const fieldToStructs = new Map<string, Set<string>>();
+  for (const st of ctx.getNodesByKind('struct')) {
+    if (!C_CPP_EXT.test(st.filePath)) continue;
+    const s = srcCache.get(st.filePath) ?? src(st.filePath);
+    if (!s) continue;
+    const body = sliceLines(s, st.startLine, st.endLine);
+    const open = body.indexOf('{');
+    const close = open >= 0 ? matchBrace(body, open) : -1;
+    if (open < 0 || close < 0) continue;
+    const inner = body.slice(open + 1, close);
+    const fields: FieldInfo[] = [];
+    let idx = 0;
+    for (const rawDecl of splitTopLevel(inner, ';')) {
+      const decl = rawDecl.trim();
+      if (!decl) continue;
+      let name: string | null = null;
+      let isFnPtr = false;
+      const ptr = decl.match(FNPTR_DECL_RE);
+      if (ptr) {
+        name = ptr[1]!;
+        isFnPtr = true;
+      } else {
+        // `TYPE [*]name` — fn-pointer when TYPE is a fn-pointer typedef.
+        const fm = decl.match(/(\w+)\s+\*?\s*(\w+)\s*$/);
+        if (fm) {
+          name = fm[2]!;
+          isFnPtr = fnPtrTypedefs.has(fm[1]!);
+        }
+      }
+      if (!name) continue;
+      fields.push({ name, index: idx, isFnPtr });
+      if (isFnPtr) {
+        if (!fieldToStructs.has(name)) fieldToStructs.set(name, new Set());
+        fieldToStructs.get(name)!.add(st.name);
+      }
+      idx++;
+    }
+    if (fields.some((f) => f.isFnPtr)) structLayout.set(st.name, fields);
+  }
+  if (structLayout.size === 0) return [];
+
+  const fnPtrFieldOf = (struct: string, field: string): boolean =>
+    !!structLayout.get(struct)?.some((f) => f.name === field && f.isFnPtr);
+
+  // C/C++ function + method nodes, materialized once (bounded by C/C++ files).
+  const cFns: Node[] = [];
+  for (const fn of iterateFns(queries)) {
+    if (C_CPP_EXT.test(fn.filePath)) cFns.push(fn);
+  }
+
+  // ---- function-name → node resolution (prefer a function in the same file) ----
+  const resolveFn = (name: string, preferFile?: string): Node | null => {
+    const cands = ctx.getNodesByName(name).filter((n) => FN_KINDS.has(n.kind));
+    if (cands.length === 0) return null;
+    if (cands.length === 1) return cands[0]!;
+    if (preferFile) {
+      const same = cands.find((n) => n.filePath === preferFile);
+      if (same) return same;
+    }
+    return cands[0]!;
+  };
+
+  // ---- Pass C: registrations — Map<"struct.field", Set<funcNodeId>> ----
+  const reg = new Map<string, Set<string>>();
+  const idToNode = new Map<string, Node>();
+  const addReg = (struct: string, field: string, fn: Node): void => {
+    const key = `${struct}.${field}`;
+    if (!reg.has(key)) reg.set(key, new Set());
+    reg.get(key)!.add(fn.id);
+    idToNode.set(fn.id, fn);
+  };
+
+  // A struct value `{ … }` (one element) — register its function entries to the
+  // struct's fields, by `.field = fn` designators or by positional slot.
+  const registerStructValue = (struct: string, valueBody: string, file: string): void => {
+    const layout = structLayout.get(struct);
+    if (!layout) return;
+    const items = splitTopLevel(valueBody, ',');
+    let pos = 0;
+    for (const rawItem of items) {
+      const item = rawItem.trim();
+      if (!item) continue;
+      const des = item.match(/^\.\s*(\w+)\s*=\s*(?:&\s*)?(\w+)\s*$/);
+      if (des) {
+        const field = des[1]!;
+        if (fnPtrFieldOf(struct, field)) {
+          const fn = resolveFn(des[2]!, file);
+          if (fn) addReg(struct, field, fn);
+        }
+        // a designated item does not advance positional counting
+        continue;
+      }
+      const field = layout.find((f) => f.index === pos);
+      if (field?.isFnPtr) {
+        const id = item.match(/^&?\s*(\w+)\s*$/);
+        if (id) {
+          const fn = resolveFn(id[1]!, file);
+          if (fn) addReg(struct, field.name, fn);
+        }
+      }
+      pos++;
+    }
+  };
+
+  // `(?:struct )?TYPE name[opt] = {` initializers, where TYPE is a struct that
+  // has ≥1 fn-pointer field. Handles both single (`= {…}`) and array
+  // (`[] = { {…}, {…} }`) forms.
+  const INIT_RE =
+    /(?:^|[;{}])\s*(?:(?:static|const|extern|register|volatile)\s+)*(?:struct\s+)?(\w+)\s+(\w+)\s*(\[[^\]]*\])?\s*=\s*\{/g;
+  for (const file of files) {
+    const s = srcCache.get(file);
+    if (!s || !s.includes('=')) continue;
+    INIT_RE.lastIndex = 0;
+    let m: RegExpExecArray | null;
+    while ((m = INIT_RE.exec(s))) {
+      const struct = m[1]!;
+      if (!structLayout.has(struct)) continue;
+      const isArray = !!m[3];
+      const open = m.index + m[0].length - 1; // points at the `{`
+      const close = matchBrace(s, open);
+      if (close < 0) continue;
+      const body = s.slice(open + 1, close);
+      if (isArray) {
+        // top-level `{ … }` element groups
+        for (const el of splitTopLevel(body, ',')) {
+          const t = el.trim();
+          if (t.startsWith('{')) {
+            const e = matchBrace(t, 0);
+            if (e > 0) registerStructValue(struct, t.slice(1, e), file);
+          } else if (t) {
+            // array of bare values (rare for structs) — treat as one positional slot
+            registerStructValue(struct, t, file);
+          }
+        }
+      } else {
+        registerStructValue(struct, body, file);
+      }
+      INIT_RE.lastIndex = close;
+    }
+  }
+
+  // ---- receiver-type resolution within a function's source ----
+  // `(?:struct )?TYPE [*]recv` declared in the params or body → TYPE (if a known struct).
+  const recvTypeIn = (fnSrc: string, recv: string): string | null => {
+    const re = new RegExp(`(?:struct\\s+)?(\\w+)\\s*\\*?\\s*\\b${recv}\\b\\s*(?:[,)=;]|\\[)`, 'g');
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(fnSrc))) {
+      if (structLayout.has(m[1]!)) return m[1]!;
+    }
+    return null;
+  };
+
+  // ---- Pass D: field←field propagation (`a->f = b->g`) ----
+  // Collected as (targetStruct.field ← sourceStruct.field) pairs, then merged to
+  // a fixpoint so a hook slot inherits a registry field's handlers.
+  const FIELD_ASSIGN_RE = /(\w+)\s*(?:->|\.)\s*(\w+)\s*=\s*(\w+)\s*(?:->|\.)\s*(\w+)/g;
+  const propagations: { to: string; from: string }[] = [];
+  for (const fn of cFns) {
+    const s = srcCache.get(fn.filePath);
+    if (!s) continue;
+    const body = sliceLines(s, fn.startLine, fn.endLine);
+    if (!body.includes('=')) continue;
+    FIELD_ASSIGN_RE.lastIndex = 0;
+    let m: RegExpExecArray | null;
+    while ((m = FIELD_ASSIGN_RE.exec(body))) {
+      const [, lrecv, lfield, rrecv, rfield] = m;
+      const lt = recvTypeIn(body, lrecv!);
+      const rt = recvTypeIn(body, rrecv!);
+      if (lt && rt && fnPtrFieldOf(lt, lfield!) && fnPtrFieldOf(rt, rfield!)) {
+        propagations.push({ to: `${lt}.${lfield}`, from: `${rt}.${rfield}` });
+      }
+    }
+  }
+  for (let pass = 0; pass < 3 && propagations.length; pass++) {
+    let changed = false;
+    for (const { to, from } of propagations) {
+      const fromSet = reg.get(from);
+      if (!fromSet) continue;
+      if (!reg.has(to)) reg.set(to, new Set());
+      const toSet = reg.get(to)!;
+      for (const id of fromSet) {
+        if (!toSet.has(id)) {
+          toSet.add(id);
+          changed = true;
+        }
+      }
+    }
+    if (!changed) break;
+  }
+  if (reg.size === 0) return [];
+
+  // ---- Pass E: dispatch sites → edges ----
+  // recv->field( or recv.field( where field is a known fn-pointer field.
+  const DISPATCH_RE = /(\w+)\s*(?:->|\.)\s*(\w+)\s*\(/g;
+  const edges: Edge[] = [];
+  const seen = new Set<string>();
+  for (const fn of cFns) {
+    const s = srcCache.get(fn.filePath);
+    if (!s) continue;
+    const body = sliceLines(s, fn.startLine, fn.endLine);
+    DISPATCH_RE.lastIndex = 0;
+    let m: RegExpExecArray | null;
+    let added = 0;
+    while ((m = DISPATCH_RE.exec(body)) && added < FANOUT_CAP) {
+      const recv = m[1]!;
+      const field = m[2]!;
+      const owners = fieldToStructs.get(field);
+      if (!owners || owners.size === 0) continue;
+      // Resolve the receiver's struct type; else fall back to a field name that
+      // belongs to exactly one struct.
+      let struct = recvTypeIn(body, recv);
+      if (!struct || !owners.has(struct)) struct = owners.size === 1 ? [...owners][0]! : null;
+      if (!struct) continue;
+      const targets = reg.get(`${struct}.${field}`);
+      if (!targets) continue;
+      const line = fn.startLine + body.slice(0, m.index).split('\n').length - 1;
+      for (const tid of targets) {
+        if (tid === fn.id) continue;
+        const key = `${fn.id}>${tid}`;
+        if (seen.has(key)) continue;
+        seen.add(key);
+        edges.push({
+          source: fn.id,
+          target: tid,
+          kind: 'calls',
+          line,
+          provenance: 'heuristic',
+          metadata: {
+            synthesizedBy: 'fn-pointer-dispatch',
+            via: `${struct}.${field}`,
+            registeredAt: `${fn.filePath}:${line}`,
+          },
+        });
+        if (++added >= FANOUT_CAP) break;
+      }
+    }
+  }
+  return edges;
+}
+
+/** C/C++ function + method nodes, streamed (memory-safe on symbol-dense repos). */
+function* iterateFns(queries: QueryBuilder): IterableIterator<Node> {
+  yield* queries.iterateNodesByKind('function');
+  yield* queries.iterateNodesByKind('method');
+}

+ 3 - 0
src/resolution/callback-synthesizer.ts

@@ -26,6 +26,7 @@ import type { QueryBuilder } from '../db/queries';
 import type { ResolutionContext } from './types';
 import type { ResolutionContext } from './types';
 import { isGeneratedFile } from '../extraction/generated-detection';
 import { isGeneratedFile } from '../extraction/generated-detection';
 import { stripCommentsForRegex } from './strip-comments';
 import { stripCommentsForRegex } from './strip-comments';
+import { cFnPointerDispatchEdges } from './c-fnptr-synthesizer';
 
 
 const REGISTRAR_NAME = /^(on[A-Z]\w*|subscribe|addListener|addEventListener|register|watch|listen|addCallback)$/;
 const REGISTRAR_NAME = /^(on[A-Z]\w*|subscribe|addListener|addEventListener|register|watch|listen|addCallback)$/;
 const DISPATCHER_NAME = /(emit|trigger|notify|dispatch|fire|publish|flush)/i;
 const DISPATCHER_NAME = /(emit|trigger|notify|dispatch|fire|publish|flush)/i;
@@ -2701,6 +2702,7 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo
   const mediatrEdges = mediatrDispatchEdges(ctx);
   const mediatrEdges = mediatrDispatchEdges(ctx);
   const sidekiqEdges = sidekiqDispatchEdges(ctx);
   const sidekiqEdges = sidekiqDispatchEdges(ctx);
   const laravelEdges = laravelEventEdges(ctx);
   const laravelEdges = laravelEventEdges(ctx);
+  const cFnPtrEdges = cFnPointerDispatchEdges(queries, ctx);
 
 
   const merged: Edge[] = [];
   const merged: Edge[] = [];
   const seen = new Set<string>();
   const seen = new Set<string>();
@@ -2734,6 +2736,7 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo
     ...mediatrEdges,
     ...mediatrEdges,
     ...sidekiqEdges,
     ...sidekiqEdges,
     ...laravelEdges,
     ...laravelEdges,
+    ...cFnPtrEdges,
   ]) {
   ]) {
     const key = `${e.source}>${e.target}`;
     const key = `${e.source}>${e.target}`;
     if (seen.has(key)) continue;
     if (seen.has(key)) continue;

+ 5 - 1
src/resolution/strip-comments.ts

@@ -33,7 +33,9 @@ export type CommentLang =
   | 'csharp'
   | 'csharp'
   | 'swift'
   | 'swift'
   | 'go'
   | 'go'
-  | 'rust';
+  | 'rust'
+  | 'c'
+  | 'cpp';
 
 
 export function stripCommentsForRegex(content: string, lang: CommentLang): string {
 export function stripCommentsForRegex(content: string, lang: CommentLang): string {
   switch (lang) {
   switch (lang) {
@@ -52,6 +54,8 @@ export function stripCommentsForRegex(content: string, lang: CommentLang): strin
     case 'java':
     case 'java':
     case 'csharp':
     case 'csharp':
     case 'swift':
     case 'swift':
+    case 'c':
+    case 'cpp':
       return stripCStyle(content, /* allowSingleQuoteStrings */ lang === 'javascript' || lang === 'typescript');
       return stripCStyle(content, /* allowSingleQuoteStrings */ lang === 'javascript' || lang === 'typescript');
     default:
     default:
       return content;
       return content;