run-all.sh 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. #!/usr/bin/env bash
  2. # With/without A/B (and optional interactive) eval for a codegraph version on a
  3. # repo. Codegraph is the ONLY variable: both arms launch claude with
  4. # --strict-mcp-config — with = codegraph-only MCP (pointed at $CG_BIN),
  5. # without = empty MCP. Built-in Read/Grep/Bash stay available in both arms.
  6. #
  7. # Usage: run-all.sh <repo-path> "<question>" [headless|tmux|all]
  8. # Env: CG_BIN codegraph binary (default: command -v codegraph)
  9. # AGENT_EVAL_OUT output dir (default: /tmp/agent-eval)
  10. set -uo pipefail
  11. REPO="${1:?usage: run-all.sh <repo-path> \"<question>\" [headless|tmux|all]}"
  12. Q="${2:?question required}"
  13. MODE="${3:-headless}"
  14. CG_BIN="${CG_BIN:-$(command -v codegraph)}"
  15. OUT="${AGENT_EVAL_OUT:-/tmp/agent-eval}"
  16. HARNESS="$(cd "$(dirname "$0")" && pwd)"
  17. mkdir -p "$OUT"
  18. [ -n "$CG_BIN" ] || { echo "no codegraph binary on PATH (set CG_BIN)"; exit 1; }
  19. [ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO — index it first"; exit 1; }
  20. case "$MODE" in headless|tmux|all) ;; *) echo "mode must be headless|tmux|all (got '$MODE')"; exit 1;; esac
  21. # MCP config files (path form avoids inline-JSON quoting through tmux).
  22. cat > "$OUT/mcp-codegraph.json" <<JSON
  23. {"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
  24. JSON
  25. echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
  26. echo "###### codegraph: $CG_BIN"
  27. echo "###### repo: $REPO"
  28. echo "###### question: $Q"
  29. echo
  30. # Headless arm: claude -p with stream-json -> exact tool sequence + tokens/cost.
  31. headless() {
  32. local label="$1" cfg="$2"
  33. echo "############################## HEADLESS [$label] ##############################"
  34. ( cd "$REPO" && claude -p "$Q" \
  35. --output-format stream-json --verbose \
  36. --permission-mode bypassPermissions \
  37. --model opus \
  38. --max-budget-usd 4 \
  39. --strict-mcp-config --mcp-config "$cfg" \
  40. > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
  41. echo "exit $? -> $OUT/run-$label.jsonl ($(wc -l < "$OUT/run-$label.jsonl" | tr -d ' ') lines)"
  42. tail -2 "$OUT/run-$label.err" 2>/dev/null
  43. node "$HARNESS/parse-run.mjs" "$OUT/run-$label.jsonl" 2>&1 || true
  44. echo
  45. }
  46. if [ "$MODE" = headless ] || [ "$MODE" = all ]; then
  47. headless "headless-with" "$OUT/mcp-codegraph.json"
  48. headless "headless-without" "$OUT/mcp-empty.json"
  49. fi
  50. if [ "$MODE" = tmux ] || [ "$MODE" = all ]; then
  51. echo "############################## INTERACTIVE [with] ##############################"
  52. CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
  53. bash "$HARNESS/itrun.sh" "$REPO" "int-with" "$Q" 2>&1 || echo "[itrun WITH failed]"
  54. echo
  55. echo "############################## INTERACTIVE [without] ##############################"
  56. CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
  57. bash "$HARNESS/itrun.sh" "$REPO" "int-without" "$Q" 2>&1 || echo "[itrun WITHOUT failed]"
  58. echo
  59. fi
  60. echo "############################## RUN-ALL COMPLETE ##############################"