1
0

run-all.sh 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/env bash
  2. # With/without A/B (and optional interactive) eval for a codegraph version on a
  3. # repo. Codegraph is the ONLY variable: both arms launch claude with
  4. # --strict-mcp-config — with = codegraph-only MCP (pointed at $CG_BIN),
  5. # without = empty MCP. Built-in Read/Grep/Bash stay available in both arms.
  6. #
  7. # Usage: run-all.sh <repo-path> "<question>" [headless|tmux|all]
  8. # Env: CG_BIN codegraph binary (default: command -v codegraph)
  9. # AGENT_EVAL_OUT output dir (default: /tmp/agent-eval)
  10. # MODEL / EFFORT claude model/effort (default: sonnet / high — the
  11. # standing A/B policy; see CLAUDE.md, don't raise)
  12. set -uo pipefail
  13. REPO="${1:?usage: run-all.sh <repo-path> \"<question>\" [headless|tmux|all]}"
  14. Q="${2:?question required}"
  15. MODE="${3:-headless}"
  16. CG_BIN="${CG_BIN:-$(command -v codegraph)}"
  17. OUT="${AGENT_EVAL_OUT:-/tmp/agent-eval}"
  18. HARNESS="$(cd "$(dirname "$0")" && pwd)"
  19. mkdir -p "$OUT"
  20. [ -n "$CG_BIN" ] || { echo "no codegraph binary on PATH (set CG_BIN)"; exit 1; }
  21. [ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO — index it first"; exit 1; }
  22. case "$MODE" in headless|tmux|all) ;; *) echo "mode must be headless|tmux|all (got '$MODE')"; exit 1;; esac
  23. # MCP config files (path form avoids inline-JSON quoting through tmux).
  24. cat > "$OUT/mcp-codegraph.json" <<JSON
  25. {"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
  26. JSON
  27. echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
  28. echo "###### codegraph: $CG_BIN"
  29. echo "###### repo: $REPO"
  30. echo "###### question: $Q"
  31. echo
  32. # Headless arm: claude -p with stream-json -> exact tool sequence + tokens/cost.
  33. headless() {
  34. local label="$1" cfg="$2"
  35. echo "############################## HEADLESS [$label] ##############################"
  36. ( cd "$REPO" && claude -p "$Q" \
  37. --output-format stream-json --verbose \
  38. --permission-mode bypassPermissions \
  39. --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" \
  40. --max-budget-usd 4 \
  41. --strict-mcp-config --mcp-config "$cfg" \
  42. > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
  43. echo "exit $? -> $OUT/run-$label.jsonl ($(wc -l < "$OUT/run-$label.jsonl" | tr -d ' ') lines)"
  44. tail -2 "$OUT/run-$label.err" 2>/dev/null
  45. node "$HARNESS/parse-run.mjs" "$OUT/run-$label.jsonl" 2>&1 || true
  46. echo
  47. }
  48. if [ "$MODE" = headless ] || [ "$MODE" = all ]; then
  49. headless "headless-with" "$OUT/mcp-codegraph.json"
  50. headless "headless-without" "$OUT/mcp-empty.json"
  51. fi
  52. if [ "$MODE" = tmux ] || [ "$MODE" = all ]; then
  53. echo "############################## INTERACTIVE [with] ##############################"
  54. CLAUDE_EXTRA_ARGS="--model ${MODEL:-sonnet} --effort ${EFFORT:-high} --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
  55. bash "$HARNESS/itrun.sh" "$REPO" "int-with" "$Q" 2>&1 || echo "[itrun WITH failed]"
  56. echo
  57. echo "############################## INTERACTIVE [without] ##############################"
  58. CLAUDE_EXTRA_ARGS="--model ${MODEL:-sonnet} --effort ${EFFORT:-high} --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
  59. bash "$HARNESS/itrun.sh" "$REPO" "int-without" "$Q" 2>&1 || echo "[itrun WITHOUT failed]"
  60. echo
  61. fi
  62. echo "############################## RUN-ALL COMPLETE ##############################"