offload-eval-3arm.sh 4.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env bash
  2. # 3-arm offload eval for ONE indexed repo + ONE question, n reps each.
  3. # ARM offload : codegraph attached, managed offload ON (per-run AI usage log)
  4. # ARM raw : codegraph attached, CODEGRAPH_OFFLOAD_DISABLE=1 (raw source)
  5. # ARM nocg : no codegraph (empty MCP config) -> Read/Grep baseline
  6. # All arms: claude -p sonnet --effort high. One JSON metrics line/run -> $RESULTS.
  7. #
  8. # Usage: offload-eval-3arm.sh <indexed-repo> <tier> <reps> "<question>"
  9. # Env: MODEL=sonnet EFFORT=high RESULTS=<file> AGENT_EVAL_OUT=<scratch dir>
  10. set -uo pipefail
  11. HERE="$(cd "$(dirname "$0")" && pwd)"
  12. ENGINE="$(cd "$HERE/../.." && pwd)"
  13. BIN="$ENGINE/dist/bin/codegraph.js"
  14. OUT="${AGENT_EVAL_OUT:-/tmp/cg-offload-eval}"
  15. TARGET="${1:?usage: offload-eval-3arm.sh <indexed-repo> <tier> <reps> \"<question>\"}"
  16. TIER="${2:?tier}"; REPS="${3:?reps}"; Q="${4:?question}"
  17. RUNS="$OUT/runs"
  18. EXTRACT="$HERE/offload-eval-metrics.mjs"
  19. RESULTS="${RESULTS:-$OUT/results.jsonl}"
  20. REPO=$(basename "$TARGET")
  21. mkdir -p "$RUNS"
  22. command -v claude >/dev/null || { echo "no claude on PATH"; exit 1; }
  23. [ -d "$TARGET/.codegraph" ] || { echo "not indexed: $TARGET (run offload-eval-setup.sh first)"; exit 1; }
  24. # Physical path so pkill matches the daemon's real cmdline (macOS /tmp->/private/tmp symlink
  25. # otherwise makes the kill miss the daemon, and the next arm connects to the SURVIVING daemon
  26. # — contaminating the raw arm with offload).
  27. TARGET=$(cd "$TARGET" && pwd -P)
  28. prewarm() { # path extra-env (e.g. "FOO=bar")
  29. pkill -9 -f "serve --mcp --path $1" 2>/dev/null; rm -f "$1/.codegraph/daemon.sock" 2>/dev/null; sleep 0.6
  30. env ${2:-} CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
  31. node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" \
  32. && echo " daemon warm" || echo " WARN daemon never bound"
  33. }
  34. run() { # arm rep mcp-config usage-log-or-dash
  35. local arm="$1" rep="$2" cfg="$3" usage="$4" tag="$REPO-$1-$2"
  36. [ "$usage" != "-" ] && : > "$usage"
  37. ( cd "$TARGET" && claude -p "$Q" \
  38. --output-format stream-json --verbose --permission-mode bypassPermissions \
  39. --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
  40. --strict-mcp-config --mcp-config "$cfg" \
  41. </dev/null > "$RUNS/$tag.jsonl" 2>"$RUNS/$tag.err" )
  42. node "$EXTRACT" --run "$RUNS/$tag.jsonl" --usage "$usage" --arm "$arm" --rep "$rep" \
  43. --repo "$REPO" --tier "$TIER" --q "$Q" >> "$RESULTS"
  44. node -e 'const o=JSON.parse(require("fs").readFileSync(process.argv[1],"utf8").trim().split("\n").pop());console.log(` [${o.arm} #${o.rep}] ${o.durationSec}s | main $${o.costUsdMain} ${o.tokBillable} tok | read=${o.read} grep=${o.grep} explore=${o.explore} offload=${o.offloadFired} | AI ${o.ai.calls}call/${o.ai.totalTokens}tok/$${o.ai.costUsd.toFixed(4)} | ok=${o.ok}`)' "$RESULTS"
  45. }
  46. CFG_OFF="$RUNS/mcp-offload-$REPO.json"; CFG_RAW="$RUNS/mcp-raw-$REPO.json"; CFG_NOCG="$RUNS/mcp-nocg.json"
  47. USAGE="$RUNS/$REPO-usage.jsonl"
  48. printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","CODEGRAPH_OFFLOAD_USAGE_LOG=%s","node","%s","serve","--mcp","--path","%s"]}}}' "$USAGE" "$BIN" "$TARGET" > "$CFG_OFF"
  49. printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","CODEGRAPH_OFFLOAD_DISABLE=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$TARGET" > "$CFG_RAW"
  50. printf '{"mcpServers":{}}' > "$CFG_NOCG"
  51. echo "###### repo=$REPO tier=$TIER reps=$REPS model=${MODEL:-sonnet}/${EFFORT:-high}"
  52. echo "###### Q=$Q"
  53. echo "== ARM offload =="; prewarm "$TARGET" "CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE"
  54. for r in $(seq 1 "$REPS"); do run offload "$r" "$CFG_OFF" "$USAGE"; done
  55. pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null; sleep 1
  56. echo "== ARM raw =="; prewarm "$TARGET" "CODEGRAPH_OFFLOAD_DISABLE=1"
  57. for r in $(seq 1 "$REPS"); do run raw "$r" "$CFG_RAW" "-"; done
  58. pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null; sleep 1
  59. echo "== ARM nocg =="
  60. for r in $(seq 1 "$REPS"); do run nocg "$r" "$CFG_NOCG" "-"; done
  61. echo "###### DONE $REPO"