1
0

ab-adoption.sh 4.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/env bash
  2. # Does the agent PICK codegraph_node to read a file, vs the built-in Read tool?
  3. # Build A/B: NEW build (HEAD, codegraph_node has Read parity) vs BASELINE build
  4. # (a ref where it doesn't), BOTH codegraph-attached + pre-warmed, same task. The
  5. # metric is tool CHOICE: Read calls vs codegraph_node[file] calls per run.
  6. #
  7. # Usage: ab-adoption.sh <indexed-repo> "<task>" [runs-per-arm] [baseline-ref]
  8. # Env: AGENT_EVAL_OUT (default: /tmp/ab-adoption)
  9. set -uo pipefail
  10. TARGET="${1:?usage: ab-adoption.sh <indexed-repo> \"<task>\" [runs] [baseline-ref]}"
  11. TASK="${2:?task required}"
  12. RUNS="${3:-2}"
  13. BASE_REF="${4:-HEAD~1}"
  14. ENGINE="$(cd "$(dirname "$0")/../.." && pwd)"
  15. BIN="$ENGINE/dist/bin/codegraph.js"
  16. OUT="${AGENT_EVAL_OUT:-/tmp/ab-adoption}"
  17. command -v claude >/dev/null || { echo "claude CLI not on PATH"; exit 1; }
  18. [ -d "$TARGET/.codegraph" ] || { echo "target not indexed: run 'codegraph init $TARGET' first"; exit 1; }
  19. git -C "$ENGINE" diff --quiet && git -C "$ENGINE" diff --cached --quiet || { echo "engine has uncommitted changes — commit/stash first"; exit 1; }
  20. CHANGED=$(git -C "$ENGINE" diff --name-only "$BASE_REF" HEAD -- src 2>/dev/null)
  21. [ -n "$CHANGED" ] || { echo "no src/ changes between $BASE_REF and HEAD"; exit 1; }
  22. cleanup() {
  23. pkill -9 -f "serve --mcp --path $OUT/" 2>/dev/null
  24. git -C "$ENGINE" checkout HEAD -- $CHANGED 2>/dev/null
  25. ( cd "$ENGINE" && npm run build >/dev/null 2>&1 )
  26. }
  27. trap cleanup EXIT
  28. mkdir -p "$OUT"
  29. echo "###### target=$TARGET runs/arm=$RUNS baseline=$BASE_REF"
  30. echo "###### changed: $(echo "$CHANGED" | tr '\n' ' ')"
  31. echo "###### task=$TASK"; echo
  32. prewarm() {
  33. pkill -9 -f "serve --mcp --path $1" 2>/dev/null
  34. CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
  35. node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" >/dev/null 2>&1
  36. }
  37. # Per-run tool-choice counts: Read vs codegraph_node[file] vs [symbol].
  38. count() {
  39. node -e '
  40. const fs=require("fs");
  41. const lines=fs.readFileSync(process.argv[1],"utf8").split("\n").filter(Boolean);
  42. let read=0,cgFile=0,cgSym=0,cgOther=0,exposed="?";
  43. for(const l of lines){try{const o=JSON.parse(l);
  44. if(o.type==="system"&&o.subtype==="init"){exposed=(o.tools||[]).filter(t=>/codegraph/.test(t)).length;}
  45. const blocks=o.message?.content||[];
  46. for(const b of (Array.isArray(blocks)?blocks:[])){
  47. if(b.type!=="tool_use")continue;
  48. if(b.name==="Read")read++;
  49. else if(b.name==="mcp__codegraph__codegraph_node"){ if(b.input&&b.input.symbol)cgSym++; else cgFile++; }
  50. else if(/mcp__codegraph__/.test(b.name))cgOther++;
  51. }
  52. }catch{}}
  53. console.log(` Read=${read} codegraph_node[file]=${cgFile} codegraph_node[symbol]=${cgSym} other_cg=${cgOther} (cg exposed=${exposed})`);
  54. ' "$1"
  55. }
  56. run_arm() { # label, N
  57. local label="$1" n="$2"
  58. local c="$OUT/mcp-$label.json"
  59. for i in $(seq 1 "$n"); do
  60. local tgt="$OUT/t-$label-$i"
  61. rm -rf "$tgt"
  62. rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$TARGET/" "$tgt/"
  63. node "$BIN" init "$tgt" >/dev/null 2>&1
  64. printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$tgt" > "$c"
  65. prewarm "$tgt"
  66. echo "----- [$label] run $i -----"
  67. ( cd "$tgt" && claude -p "$TASK" \
  68. --output-format stream-json --verbose --permission-mode bypassPermissions \
  69. --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
  70. </dev/null > "$OUT/run-$label-$i.jsonl" 2>"$OUT/run-$label-$i.err" )
  71. count "$OUT/run-$label-$i.jsonl"
  72. pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
  73. done
  74. echo
  75. }
  76. echo "== NEW build (HEAD: codegraph_node has Read parity) =="
  77. ( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
  78. run_arm new "$RUNS"
  79. echo "== BASELINE build ($BASE_REF) =="
  80. git -C "$ENGINE" checkout "$BASE_REF" -- $CHANGED
  81. ( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
  82. run_arm baseline "$RUNS"
  83. echo "###### DONE — compare [new] vs [baseline]: does codegraph_node[file] rise / Read fall? Logs: $OUT"