run-arms.sh 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. #!/usr/bin/env bash
  2. # Tool-surface ablation — run ONE repo+question under ONE arm.
  3. #
  4. # Arms vary (exposed codegraph tools, trace-first steering). Tools are trimmed
  5. # SERVER-SIDE via CODEGRAPH_MCP_TOOLS in the MCP config's `env` block, so an
  6. # ablated tool is genuinely absent from ListTools — no deferred-ToolSearch or
  7. # denied-call confound (which --disallowedTools would introduce). Steering is
  8. # injected with --append-system-prompt, so no rebuild of the shipped
  9. # server-instructions is needed to A/B it.
  10. #
  11. # A control all tools no steering
  12. # B steer all tools trace-first
  13. # C no-explore hide explore trace-first
  14. # D trace-centric hide explore+context trace-first
  15. # E control-probe hide explore+context trace-first (caller passes a NON-flow Q)
  16. #
  17. # Usage: run-arms.sh <repo-path> "<question>" <A|B|C|D|E> [run-id]
  18. set -uo pipefail
  19. REPO="${1:?repo path}"; Q="${2:?question}"; ARM="${3:?arm A-E}"; RID="${4:-1}"
  20. CG_BIN="${CG_BIN:-$(command -v codegraph)}"
  21. OUT="${ARMS_OUT:-/tmp/arms}/$(basename "$REPO")"
  22. mkdir -p "$OUT"
  23. [ -n "$CG_BIN" ] || { echo "no codegraph binary (set CG_BIN)"; exit 1; }
  24. [ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO"; exit 1; }
  25. STEER='Flow questions ("how does X reach/become Y", "trace the flow", request to handler, state to render): call codegraph_trace(from,to) FIRST — one call returns the whole path. Use codegraph_context/search only to locate the two endpoint symbols if you do not know them. Do NOT reconstruct the path with repeated search/callers/explore.'
  26. KEEP_NO_EXPLORE="trace,search,node,context,callers,callees,impact,files,status"
  27. KEEP_TRACE_CENTRIC="trace,search,node,callers,callees,impact,files,status"
  28. case "$ARM" in
  29. A|G|H|I) TOOLS=""; STEERING="" ;; # no steering; H = body-trace, I = body-trace + destination callees (sufficiency)
  30. B|F) TOOLS=""; STEERING="$STEER" ;; # F = B's surface, run on the body-inlining trace build
  31. C) TOOLS="$KEEP_NO_EXPLORE"; STEERING="$STEER" ;;
  32. D|E) TOOLS="$KEEP_TRACE_CENTRIC"; STEERING="$STEER" ;;
  33. *) echo "bad arm '$ARM' (want A|B|C|D|E)"; exit 1 ;;
  34. esac
  35. CFG="$OUT/mcp-$ARM.json"
  36. if [ -n "$TOOLS" ]; then
  37. cat > "$CFG" <<JSON
  38. {"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"],"env":{"CODEGRAPH_MCP_TOOLS":"$TOOLS"}}}}
  39. JSON
  40. else
  41. cat > "$CFG" <<JSON
  42. {"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
  43. JSON
  44. fi
  45. LOG="$OUT/$ARM-r$RID.jsonl"; ERR="$OUT/$ARM-r$RID.err"
  46. ARGS=( -p "$Q" --output-format stream-json --verbose
  47. --permission-mode bypassPermissions --model opus --max-budget-usd 4
  48. --strict-mcp-config --mcp-config "$CFG" )
  49. [ -n "$STEERING" ] && ARGS+=( --append-system-prompt "$STEERING" )
  50. ( cd "$REPO" && claude "${ARGS[@]}" > "$LOG" 2>"$ERR" )
  51. echo "[$(basename "$REPO") $ARM r$RID] exit $? -> $LOG ($(wc -l < "$LOG" | tr -d ' ') lines)"