mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-20 11:19:56 +08:00
feat: parallelize CI evals — 12 runners (1 per suite) for ~3min wall clock
Switch eval workflow to use Docker container image with pre-baked toolchain. Each of 12 matrix runners pulls the image, hardlinks cached node_modules, builds browse, and runs one test suite. Setup drops from ~70s to ~19s per runner. Wall clock is dominated by the slowest individual test, not sequential sum. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
34
.github/workflows/evals.yml
vendored
34
.github/workflows/evals.yml
vendored
@@ -10,6 +10,11 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
evals:
|
evals:
|
||||||
runs-on: ubicloud-standard-2
|
runs-on: ubicloud-standard-2
|
||||||
|
container:
|
||||||
|
image: ghcr.io/${{ github.repository }}/ci:latest
|
||||||
|
credentials:
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -44,22 +49,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- uses: oven-sh/setup-bun@v2
|
# Restore pre-installed node_modules from Docker image (~1s vs ~15s install)
|
||||||
|
# If lockfile changed since image was built, fall back to fresh install
|
||||||
- name: Cache bun dependencies
|
- name: Restore deps
|
||||||
uses: actions/cache@v4
|
run: |
|
||||||
with:
|
if diff -q /opt/node_modules_cache/.package-lock.json package.json >/dev/null 2>&1; then
|
||||||
path: ~/.bun/install/cache
|
cp -al /opt/node_modules_cache node_modules
|
||||||
key: bun-${{ hashFiles('bun.lockb') }}
|
else
|
||||||
restore-keys: bun-
|
bun install
|
||||||
|
fi
|
||||||
- run: bun install
|
|
||||||
|
|
||||||
- run: bun run build
|
- run: bun run build
|
||||||
|
|
||||||
- name: Install Claude CLI
|
|
||||||
run: npm i -g @anthropic-ai/claude-code
|
|
||||||
|
|
||||||
- name: Run ${{ matrix.suite.name }}
|
- name: Run ${{ matrix.suite.name }}
|
||||||
env:
|
env:
|
||||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
@@ -78,6 +79,11 @@ jobs:
|
|||||||
|
|
||||||
report:
|
report:
|
||||||
runs-on: ubicloud-standard-2
|
runs-on: ubicloud-standard-2
|
||||||
|
container:
|
||||||
|
image: ghcr.io/${{ github.repository }}/ci:latest
|
||||||
|
credentials:
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
needs: evals
|
needs: evals
|
||||||
if: always() && github.event_name == 'pull_request'
|
if: always() && github.event_name == 'pull_request'
|
||||||
timeout-minutes: 5
|
timeout-minutes: 5
|
||||||
@@ -129,7 +135,7 @@ jobs:
|
|||||||
$(echo -e "$SUITE_LINES")
|
$(echo -e "$SUITE_LINES")
|
||||||
|
|
||||||
---
|
---
|
||||||
*12x ubicloud-standard-2 ($0.0008/min each) | Wall clock ≈ slowest suite*"
|
*12x ubicloud-standard-2 (Docker: pre-baked toolchain + deps) | wall clock ≈ slowest suite*"
|
||||||
|
|
||||||
if [ "$FAILED" -gt 0 ]; then
|
if [ "$FAILED" -gt 0 ]; then
|
||||||
FAILURES=""
|
FAILURES=""
|
||||||
|
|||||||
Reference in New Issue
Block a user