c-fnptr-synthesizer.test.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /**
  2. * C/C++ function-pointer dispatch synthesis (#932).
  3. *
  4. * C polymorphism is the function pointer: a struct fn-pointer field, registered
  5. * to concrete functions in a table (positional `{"add", cmd_add}` or designated
  6. * `.fn = cmd_add`) or by assignment, then dispatched indirectly (`p->fn(argv)`).
  7. * Static extraction sees neither the registration→field binding nor the
  8. * indirect call, so the dispatcher→handler edge is missing. These tests prove
  9. * the bridge keyed by (struct type, fn-pointer field): the command-table shape,
  10. * designated init, the typedef'd-field + field←field double-hop (the issue's
  11. * own hook_demo.c shape), by-value dispatch, and the precision boundaries
  12. * (a data field is never bridged, distinct fn-pointer fields don't cross-bleed,
  13. * and a non-C project is a no-op). Plus the BARE ARRAY of function pointers
  14. * (no struct, no field) keyed by the array variable name — the opcode-table
  15. * shape `opcodes[op](…)`, the designated + cast-wrapped form with a
  16. * calling-convention typedef, same-named file-local arrays resolving without a
  17. * cross-file leak, and a registered-but-never-dispatched array (the control).
  18. */
  19. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  20. import * as fs from 'node:fs';
  21. import * as path from 'node:path';
  22. import * as os from 'node:os';
  23. import { CodeGraph } from '../src';
  24. describe('c-fnptr dispatch synthesizer', () => {
  25. let dir: string;
  26. beforeEach(() => { dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cfp-')); });
  27. afterEach(() => { fs.rmSync(dir, { recursive: true, force: true }); });
  28. const write = (rel: string, body: string) => {
  29. const p = path.join(dir, rel);
  30. fs.mkdirSync(path.dirname(p), { recursive: true });
  31. fs.writeFileSync(p, body);
  32. };
  33. const load = async () => {
  34. const cg = await CodeGraph.init(dir, { silent: true });
  35. await cg.indexAll();
  36. const db = (cg as any).db.db;
  37. const edges: { src: string; tgt: string; via: string }[] = db
  38. .prepare(
  39. `SELECT s.name src, t.name tgt, json_extract(e.metadata,'$.via') via
  40. FROM edges e JOIN nodes s ON s.id = e.source JOIN nodes t ON t.id = e.target
  41. WHERE json_extract(e.metadata,'$.synthesizedBy') = 'fn-pointer-dispatch'`
  42. )
  43. .all();
  44. cg.close?.();
  45. return edges;
  46. };
  47. const has = (edges: any[], src: string, tgt: string) => edges.some((e) => e.src === src && e.tgt === tgt);
  48. it('bridges a {name, fn} command table dispatched through p->fn() (the git shape)', async () => {
  49. write('cmd.c', `
  50. struct cmd { const char *name; int (*fn)(int argc); };
  51. static int cmd_add(int argc) { return argc + 1; }
  52. static int cmd_rm(int argc) { return argc - 1; }
  53. static int cmd_noop(int argc) { return argc; } /* defined, NOT in the table */
  54. static struct cmd commands[] = {
  55. { "add", cmd_add },
  56. { "rm", cmd_rm },
  57. };
  58. int run_builtin(struct cmd *p, int argc) {
  59. return p->fn(argc);
  60. }
  61. `);
  62. const edges = await load();
  63. expect(has(edges, 'run_builtin', 'cmd_add')).toBe(true);
  64. expect(has(edges, 'run_builtin', 'cmd_rm')).toBe(true);
  65. expect(edges.every((e) => e.via === 'cmd.fn')).toBe(true);
  66. // PRECISION: a function not registered in the table is never a target.
  67. expect(has(edges, 'run_builtin', 'cmd_noop')).toBe(false);
  68. });
  69. it('bridges designated-init (.handler = fn) and by-value c.fn() dispatch', async () => {
  70. write('ops.c', `
  71. struct ops { int (*handler)(void); int size; };
  72. static int on_open(void) { return 1; }
  73. static struct ops the_ops = { .handler = on_open, .size = 4 };
  74. int dispatch(struct ops o) { return o.handler(); }
  75. `);
  76. const edges = await load();
  77. expect(has(edges, 'dispatch', 'on_open')).toBe(true);
  78. expect(edges.every((e) => e.via === 'ops.handler')).toBe(true);
  79. });
  80. it('bridges the typedef-field + field←field double-hop (the hook_demo.c shape)', async () => {
  81. write('hook.c', `
  82. typedef void (*hook_func)(void);
  83. struct hooks { hook_func func; };
  84. struct entry { const char *name; hook_func fn; };
  85. static void hk_set(void) {}
  86. static void hk_get(void) {}
  87. static const struct entry registry[] = {
  88. { "set", hk_set },
  89. { "get", hk_get },
  90. };
  91. void call(struct hooks *h, const struct entry *found) {
  92. h->func = found->fn; /* generic slot reassigned from the registry */
  93. h->func(); /* dispatch through hooks.func */
  94. }
  95. `);
  96. const edges = await load();
  97. // hooks.func has no direct registration; it inherits entry.fn's via h->func = found->fn.
  98. expect(has(edges, 'call', 'hk_set')).toBe(true);
  99. expect(has(edges, 'call', 'hk_get')).toBe(true);
  100. });
  101. it('keys by (struct, field): distinct fn-pointer fields do not cross-bleed', async () => {
  102. write('vtable.c', `
  103. struct io { int (*read)(void); int (*write)(int); };
  104. static int do_read(void) { return 0; }
  105. static int do_write(int x) { return x; }
  106. static struct io io = { .read = do_read, .write = do_write };
  107. int only_reads(struct io *p) { return p->read(); }
  108. `);
  109. const edges = await load();
  110. // only_reads dispatches ->read → do_read, and must NOT reach do_write (a different field).
  111. expect(has(edges, 'only_reads', 'do_read')).toBe(true);
  112. expect(has(edges, 'only_reads', 'do_write')).toBe(false);
  113. });
  114. it('does not bridge a plain data field, and no-ops on a struct with no dispatch', async () => {
  115. write('data.c', `
  116. struct box { int count; int (*fn)(void); };
  117. static int helper(void) { return 0; }
  118. static struct box b = { .count = 3, .fn = helper };
  119. /* reads a data field and never dispatches the fn pointer */
  120. int total(struct box *x) { return x->count + 1; }
  121. `);
  122. const edges = await load();
  123. // No indirect dispatch happens, so there are no synthesized edges at all.
  124. expect(edges.length).toBe(0);
  125. });
  126. it('is a no-op on a project with no C/C++ (clean control)', async () => {
  127. write('app.js', `
  128. const handlers = { add: (x) => x + 1, rm: (x) => x - 1 };
  129. function run(name, x) { return handlers[name](x); }
  130. `);
  131. const edges = await load();
  132. expect(edges.length).toBe(0);
  133. });
  134. // The redis command-table shape, minimized: the handler is wrapped in a
  135. // function-like macro, the table's struct type is an object-like macro alias,
  136. // the fn-pointer field uses a function-TYPE typedef, and the dispatch receiver
  137. // is a chained field access through a multi-declarator field.
  138. it('bridges a macro-built table with a typedef field, type-alias macro, and chained dispatch', async () => {
  139. write('reg.h', `
  140. typedef void cmdProc(int x); /* function-TYPE typedef, not (*name) */
  141. struct command { const char *name; cmdProc *proc; };
  142. struct context { int id; struct command *cmd, *last; }; /* multi-declarator field */
  143. `);
  144. write('reg.c', `
  145. #include "reg.h"
  146. #define ENTRY(nm, handler) nm, handler /* function-like macro wrapping the handler */
  147. #define CMD_T command /* object-like macro: the struct-type alias */
  148. static void getCmd(int x) {}
  149. static void setCmd(int x) {}
  150. static void unusedCmd(int x) {} /* defined, NOT in the table */
  151. static struct CMD_T table[] = {
  152. { ENTRY("get", getCmd) },
  153. { ENTRY("set", setCmd) },
  154. };
  155. void run(struct context *ctx, int x) { ctx->cmd->proc(x); } /* context.cmd → command → proc */
  156. `);
  157. const edges = await load();
  158. expect(has(edges, 'run', 'getCmd')).toBe(true);
  159. expect(has(edges, 'run', 'setCmd')).toBe(true);
  160. expect(edges.every((e) => e.via === 'command.proc')).toBe(true);
  161. // PRECISION: a function not registered in the table is never a target.
  162. expect(has(edges, 'run', 'unusedCmd')).toBe(false);
  163. });
  164. // redis generates its command table into a `.def` that is #included (and never
  165. // indexed on its own). The synthesizer reads the included file with the
  166. // includer's macros in scope so the table still resolves.
  167. it('reads a macro-built table from a non-indexed #included file', async () => {
  168. write('inc.h', `
  169. typedef int opRun(void);
  170. struct op { const char *name; opRun *run; };
  171. `);
  172. write('inc.c', `
  173. #include "inc.h"
  174. #define MK(nm, fn) nm, fn
  175. #define CMD_T op
  176. static int a_impl(void){return 0;}
  177. static int b_impl(void){return 0;}
  178. #include "ops.def"
  179. int go(struct op *o) { return o->run(); }
  180. `);
  181. // `.def` is not a C source extension, so this file is never indexed — it is
  182. // only visible to the synthesizer through inc.c's #include.
  183. write('ops.def', `
  184. static struct CMD_T optable[] = {
  185. { MK("a", a_impl) },
  186. { MK("b", b_impl) },
  187. };
  188. `);
  189. const edges = await load();
  190. expect(has(edges, 'go', 'a_impl')).toBe(true);
  191. expect(has(edges, 'go', 'b_impl')).toBe(true);
  192. expect(edges.every((e) => e.via === 'op.run')).toBe(true);
  193. });
  194. // The sqlite builtin-function-table shape: the table-building macro lives in a
  195. // header (`sqliteInt.h`), separate from the file with the table (`func.c`), and
  196. // expands to a whole brace-wrapped struct element `{ …, xFunc, … }`.
  197. it('expands a header-defined macro that produces a brace-wrapped element', async () => {
  198. write('fn.h', `
  199. typedef void sqlFn(int *ctx);
  200. struct FuncDef { int nArg; sqlFn *xFunc; const char *zName; };
  201. #define MKFUNC(name, impl) { 1, impl, #name }
  202. `);
  203. write('fn.c', `
  204. #include "fn.h"
  205. static void absImpl(int *ctx) {}
  206. static void lenImpl(int *ctx) {}
  207. static struct FuncDef builtins[] = {
  208. MKFUNC(abs, absImpl),
  209. MKFUNC(len, lenImpl),
  210. };
  211. void invoke(struct FuncDef *p, int *x) { p->xFunc(x); }
  212. `);
  213. const edges = await load();
  214. expect(has(edges, 'invoke', 'absImpl')).toBe(true);
  215. expect(has(edges, 'invoke', 'lenImpl')).toBe(true);
  216. expect(edges.every((e) => e.via === 'FuncDef.xFunc')).toBe(true);
  217. });
  218. // The vim command-table shape: a table-building macro and the struct are both
  219. // behind `#ifdef`, defined INLINE with the array (`struct cmd_entry {…} table[]`)
  220. // in a header that a `.c` #includes after setting the switch macro, and the
  221. // dispatch is a parenthesized array subscript through the file-scope table
  222. // (`(cmd_table[i].handler)(x)`). Exercises #ifdef evaluation, the conditionally
  223. // redefined macro, the inline struct (never a node), and array/global dispatch.
  224. it('bridges an #ifdef-guarded inline-struct table dispatched by array subscript', async () => {
  225. write('cmds.h', `
  226. #ifdef DECLARE_TABLE
  227. # define CMD(id, name, fn) { name, fn }
  228. typedef void (*cmd_fn)(int arg);
  229. static struct cmd_entry { const char *cmd_name; cmd_fn handler; } cmd_table[] =
  230. #else
  231. # define CMD(id, name, fn) id
  232. enum cmd_id
  233. #endif
  234. {
  235. CMD(C_a, "a", do_a),
  236. CMD(C_b, "b", do_b),
  237. };
  238. `);
  239. write('main.c', `
  240. #define DECLARE_TABLE
  241. #include "cmds.h"
  242. static void do_a(int arg) {}
  243. static void do_b(int arg) {}
  244. static void unused(int arg) {} /* defined, NOT in the table */
  245. void run(int idx, int x) { (cmd_table[idx].handler)(x); }
  246. `);
  247. const edges = await load();
  248. expect(has(edges, 'run', 'do_a')).toBe(true);
  249. expect(has(edges, 'run', 'do_b')).toBe(true);
  250. expect(edges.every((e) => e.via === 'cmd_entry.handler')).toBe(true);
  251. expect(has(edges, 'run', 'unused')).toBe(false);
  252. });
  253. // A bare ARRAY of function pointers — no struct, no field. The element type is
  254. // a function-TYPE typedef (`op_t *opcodes[]`), entries are literal function
  255. // names, and dispatch is a plain subscript-then-call `opcodes[op](…)` (the
  256. // SameBoy CPU opcode-table shape). Keyed by the array variable name.
  257. it('bridges a bare array of function pointers dispatched by subscript (the opcode-table shape)', async () => {
  258. write('cpu.c', `
  259. typedef void op_t(int *vm, unsigned char opcode);
  260. static void nop(int *vm, unsigned char opcode) {}
  261. static void inc(int *vm, unsigned char opcode) {}
  262. static void unreg(int *vm, unsigned char opcode) {} /* defined, NOT in the table */
  263. static op_t *opcodes[256] = { nop, inc };
  264. void cpu_run(int *vm) {
  265. unsigned char opcode = 0;
  266. opcodes[opcode](vm, opcode);
  267. }
  268. `);
  269. const edges = await load();
  270. expect(has(edges, 'cpu_run', 'nop')).toBe(true);
  271. expect(has(edges, 'cpu_run', 'inc')).toBe(true);
  272. expect(edges.every((e) => e.via === 'opcodes[]')).toBe(true);
  273. // PRECISION: a function not in the array is never a target.
  274. expect(has(edges, 'cpu_run', 'unreg')).toBe(false);
  275. });
  276. // The php Zend shape: a function-POINTER typedef whose declarator carries a
  277. // calling-convention macro before the `*` (`(FASTCALL *dtor_t)`), an array of
  278. // it filled by DESIGNATED index with CAST-wrapped entries (`[1] = (dtor_t)fn`),
  279. // dispatched through a subscript whose index is itself a call (`t[type(p)](p)`).
  280. it('bridges a designated + cast-wrapped array with a calling-convention typedef (the Zend dtor shape)', async () => {
  281. write('rc.c', `
  282. #define FASTCALL
  283. typedef void (FASTCALL *dtor_t)(int *p);
  284. static void empty_dtor(int *p) {}
  285. static void str_dtor(int *p) {}
  286. static void arr_dtor(int *p) {}
  287. static int type_of(int *p) { return 0; }
  288. static const dtor_t rc_dtor[] = {
  289. [0] = (dtor_t)empty_dtor,
  290. [1] = (dtor_t)str_dtor,
  291. [2] = (dtor_t)arr_dtor,
  292. };
  293. void rc_free(int *p) { rc_dtor[type_of(p)](p); }
  294. `);
  295. const edges = await load();
  296. expect(has(edges, 'rc_free', 'empty_dtor')).toBe(true);
  297. expect(has(edges, 'rc_free', 'str_dtor')).toBe(true);
  298. expect(has(edges, 'rc_free', 'arr_dtor')).toBe(true);
  299. expect(edges.every((e) => e.via === 'rc_dtor[]')).toBe(true);
  300. });
  301. // Two file-local `static` arrays share the same name across files (SameBoy
  302. // declares `opcodes[256]` in both the CPU and the disassembler). Dispatch must
  303. // resolve to the SAME file's table — no cross-file leak.
  304. it('resolves same-named file-local arrays to their own file (no cross-file leak)', async () => {
  305. write('a.c', `
  306. typedef void af_t(int *m);
  307. static void a_one(int *m) {}
  308. static void a_two(int *m) {}
  309. static af_t *table[8] = { a_one, a_two };
  310. void a_run(int *m, int i) { table[i](m); }
  311. `);
  312. write('b.c', `
  313. typedef void bf_t(int *m);
  314. static void b_one(int *m) {}
  315. static void b_two(int *m) {}
  316. static bf_t *table[8] = { b_one, b_two };
  317. void b_run(int *m, int i) { table[i](m); }
  318. `);
  319. const edges = await load();
  320. expect(has(edges, 'a_run', 'a_one')).toBe(true);
  321. expect(has(edges, 'a_run', 'a_two')).toBe(true);
  322. expect(has(edges, 'b_run', 'b_one')).toBe(true);
  323. // PRECISION: a_run's `table` is a.c's, never b.c's (and vice versa).
  324. expect(has(edges, 'a_run', 'b_one')).toBe(false);
  325. expect(has(edges, 'b_run', 'a_one')).toBe(false);
  326. });
  327. // PRECISION: an array of function pointers that is REGISTERED elsewhere (passed
  328. // by element to a registrar) but never C-dispatched `arr[i](…)` yields nothing
  329. // — the lua `package.searchers` shape, where elements are pushed into the VM.
  330. it('does not bridge a fn-pointer array that is registered, not dispatched (the searchers control)', async () => {
  331. write('pkg.c', `
  332. typedef int searcher_t(int *L);
  333. static int s_preload(int *L) { return 0; }
  334. static int s_lua(int *L) { return 0; }
  335. static searcher_t *searchers[] = { s_preload, s_lua, 0 };
  336. extern void register_one(int *L, searcher_t *s);
  337. void setup(int *L) {
  338. for (int i = 0; searchers[i]; i++) register_one(L, searchers[i]);
  339. }
  340. `);
  341. const edges = await load();
  342. expect(edges.length).toBe(0);
  343. });
  344. });