1
0

verify-extraction.mjs 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. #!/usr/bin/env node
  2. // Sanity-check that codegraph extracted REAL symbols (not just file/import nodes)
  3. // from a repo for a given language. Exits non-zero on a critical failure so it
  4. // can drive a write-extractor -> build -> re-check loop.
  5. //
  6. // Usage: node scripts/add-lang/verify-extraction.mjs <repo-path> <lang>
  7. // Reads `codegraph status <repo> --json` using whatever codegraph is on PATH,
  8. // so it reflects the binary that built the index.
  9. //
  10. // Exit codes: 0 = pass or soft-warn, 1 = critical fail, 2 = could not run.
  11. import { execFileSync } from 'node:child_process';
  12. const [repo, lang] = process.argv.slice(2);
  13. if (!repo || !lang) {
  14. console.error('usage: verify-extraction.mjs <repo-path> <lang>');
  15. process.exit(2);
  16. }
  17. let status;
  18. try {
  19. const out = execFileSync('codegraph', ['status', repo, '--json'], { encoding: 'utf8' });
  20. status = JSON.parse(out);
  21. } catch (e) {
  22. console.error(`[verify] could not read codegraph status for ${repo}: ${e.message}`);
  23. process.exit(2);
  24. }
  25. // Kinds that prove the extractor mapped AST node types (everything except
  26. // 'file' and 'import', which codegraph creates structurally for any language).
  27. const SYMBOL_KINDS = new Set([
  28. 'module', 'class', 'struct', 'interface', 'trait', 'protocol', 'function',
  29. 'method', 'property', 'field', 'variable', 'constant', 'enum', 'enum_member',
  30. 'type_alias', 'namespace', 'route', 'component',
  31. ]);
  32. const byKind = status.nodesByKind || {};
  33. const langs = status.languages || [];
  34. const files = status.fileCount || 0;
  35. const edges = status.edgeCount || 0;
  36. const symbolKinds = Object.keys(byKind).filter((k) => SYMBOL_KINDS.has(k));
  37. const symbolCount = symbolKinds.reduce((s, k) => s + byKind[k], 0);
  38. const checks = [];
  39. const add = (severity, ok, label, detail) => checks.push({ severity, ok, label, detail });
  40. add('critical', status.initialized === true, 'index initialized', `initialized=${status.initialized}`);
  41. add('critical', langs.includes(lang), `language "${lang}" detected`, `languages=[${langs.join(', ')}]`);
  42. add('critical', symbolCount > 0, 'structural symbols extracted', `${symbolCount} symbols (${symbolKinds.join(', ') || 'NONE — only file/import nodes!'})`);
  43. add('soft', symbolCount >= files, 'symbol density >= 1/file', `${symbolCount} symbols across ${files} files`);
  44. add('soft', edges > files, 'edges resolved', `${edges} edges across ${files} files`);
  45. console.log(`\n# Extraction check — ${repo} (lang=${lang}, backend=${status.backend})`);
  46. console.log(` files=${files} nodes=${status.nodeCount} edges=${edges}`);
  47. console.log(` nodesByKind: ${JSON.stringify(byKind)}\n`);
  48. for (const c of checks) console.log(` ${c.ok ? '✓' : '✗'} ${c.label} — ${c.detail}`);
  49. const critical = checks.filter((c) => !c.ok && c.severity === 'critical');
  50. const soft = checks.filter((c) => !c.ok && c.severity === 'soft');
  51. console.log();
  52. if (critical.length) {
  53. console.log(`RESULT: FAIL (${critical.length} critical) — extractor or grammar wiring is broken. Re-run dump-ast.mjs and fix the node-type mappings.`);
  54. process.exit(1);
  55. }
  56. if (soft.length) {
  57. console.log(`RESULT: WARN (${soft.length} soft) — extraction works but looks thin; inspect the counts above.`);
  58. process.exit(0);
  59. }
  60. console.log('RESULT: PASS — extraction looks healthy.');
  61. process.exit(0);