Skip to content

feat(ci): route @claude tasks via Haiku, provision env on demand #1211

feat(ci): route @claude tasks via Haiku, provision env on demand

feat(ci): route @claude tasks via Haiku, provision env on demand #1211

Workflow file for this run

name: Claude
on:
issue_comment:
types: [created]
issues:
types: [opened, assigned]
pull_request_review_comment:
types: [created]
pull_request:
types: [opened, synchronize, ready_for_review]
# Single source of truth for the models. Bumping either is a one-line change.
env:
CLAUDE_MODEL: claude-opus-4-8
CLAUDE_ROUTER_MODEL: claude-haiku-4-5
jobs:
# @claude <anything> in an issue/comment. A Haiku router classifies the request —
# including review asks — and provisions only the environment it needs (browser/dev
# server installed on demand), then runs the matching prompt. The review job below
# handles only the automatic on-PR-open/sync review (no comment to route).
implement:
name: Claude (implement)
if: >-
((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment')
&& contains(github.event.comment.body || '', '@claude')
&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.comment.author_association))
||
(github.event_name == 'issues'
&& (contains(github.event.issue.body || '', '@claude') || contains(github.event.issue.title || '', '@claude'))
&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.issue.author_association))
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.PAT }}
VERCEL_GIT_COMMIT_REF: claude/${{ github.ref_name }}
CHROME_DEVTOOLS_MCP_NO_USAGE_STATISTICS: '1'
DATABASE_READONLY_URL: ${{ secrets.DATABASE_READONLY_URL }}
DATABASE_DRIVER: neon
DATABASE_SSL: 'true'
permissions:
contents: write
pull-requests: write
issues: write
actions: read
id-token: write
# Serialize @claude runs on the same issue/PR so back-to-back comments (incl. reviews)
# don't clobber each other — matches the protection the standalone review job had.
concurrency:
group: claude-${{ github.event.pull_request.number || github.event.issue.number }}
cancel-in-progress: false
steps:
# Decide the run shape from the request text. Explicit keywords win deterministically;
# otherwise Haiku classifies. Any failure falls back to the lean path (no browser/dev
# server), which the agent can still self-provision on demand.
- name: Route task
id: route
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ROUTER_MODEL: ${{ env.CLAUDE_ROUTER_MODEL }}
BODY: ${{ github.event.comment.body || github.event.issue.body || '' }}
TITLE: ${{ github.event.issue.title || '' }}
run: |
set -euo pipefail
TEXT="$(printf '%s\n%s' "${TITLE:-}" "${BODY:-}")"
emit() {
{
echo "browser=$1"
echo "dev_server=$2"
echo "profile=$3"
} >> "$GITHUB_OUTPUT"
echo "Routed: profile=$3 browser=$1 dev_server=$2 (source: $4)"
}
# 1. Explicit overrides — no API call.
if printf '%s' "$TEXT" | grep -qi '@claude review'; then
emit none false review "explicit @claude review"; exit 0
fi
if printf '%s' "$TEXT" | grep -qi '@claude chrome'; then
emit chrome true ui "explicit @claude chrome"; exit 0
fi
if printf '%s' "$TEXT" | grep -qi '@claude frontend'; then
emit playwright true ui "explicit @claude frontend"; exit 0
fi
if printf '%s' "$TEXT" | grep -qiE '@claude (general|lite)'; then
emit none false code "explicit @claude general"; exit 0
fi
# 2. Classify with Haiku. Lean fallback on any problem.
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
emit none false code "no ANTHROPIC_API_KEY"; exit 0
fi
SYSTEM='You are a routing classifier for an automated engineering bot on InferenceX, a Next.js dashboard for ML inference benchmarks. Given a GitHub issue/comment, decide the execution environment. Respond with ONLY a compact JSON object and nothing else: {"profile":"ui|code|docs|question|review","browser":"playwright|chrome|none"}. Guidance: A request to review, critique, or give feedback on a pull request or its diff (e.g. "review this", "take a look at the PR", "any issues with this change?") -> profile=review, browser=none. UI / chart / D3 / React component / styling / .tsx / visual work -> profile=ui, browser=playwright (use browser=chrome ONLY if the user explicitly asks for Chrome DevTools / network / console debugging). Backend / DB / ETL / ingest / scripts / lib / API-route logic -> profile=code, browser=none. Docs / markdown / blog content / config / CI workflow files -> profile=docs, browser=none. A pure question or code explanation needing no file change -> profile=question, browser=none. When unsure between ui and code, choose code.'
REQ="$(jq -n --arg m "${ROUTER_MODEL:-claude-haiku-4-5}" --arg s "$SYSTEM" --arg u "$TEXT" \
'{model:$m, max_tokens:300, system:$s, messages:[{role:"user", content:$u}]}' 2>/dev/null || true)"
if [ -z "$REQ" ]; then
emit none false code "request build failed"; exit 0
fi
RESP="$(curl -sS --max-time 60 https://api.anthropic.com/v1/messages \
-H "x-api-key: ${ANTHROPIC_API_KEY}" \
-H "anthropic-version: 2023-06-01" \
-H "content-type: application/json" \
-d "$REQ" 2>/dev/null || true)"
OUT="$(printf '%s' "$RESP" | jq -r '.content[0].text // empty' 2>/dev/null || true)"
CLEAN="$(printf '%s' "$OUT" | sed 's/```json//g; s/```//g')"
PROFILE="$(printf '%s' "$CLEAN" | jq -r '.profile // empty' 2>/dev/null || true)"
BROWSER="$(printf '%s' "$CLEAN" | jq -r '.browser // empty' 2>/dev/null || true)"
case "$PROFILE" in ui|code|docs|question|review) ;; *) PROFILE=code ;; esac
case "$BROWSER" in playwright|chrome|none) ;; *) BROWSER=none ;; esac
# Keep browser coherent with profile so the provisioned setup matches the prompt's
# definition-of-done. A ui task must get a browser (default Playwright if the
# classifier omitted it or returned none); every non-ui profile gets none, so it
# never pre-provisions a browser/dev server it won't use.
case "$PROFILE" in
ui) [ "$BROWSER" = "none" ] && BROWSER=playwright || true ;;
*) BROWSER=none ;;
esac
# Dev server is provisioned iff a browser is — you can't verify in a browser
# without the app running, and non-browser tasks don't need it.
if [ "$BROWSER" = "none" ]; then DEVSERVER=false; else DEVSERVER=true; fi
emit "$BROWSER" "$DEVSERVER" "$PROFILE" "haiku"
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
fetch-depth: 0
token: ${{ secrets.PAT }}
- name: Setup pnpm
uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '24'
cache: 'pnpm'
# Always restore the browser/binary caches (a restore is a near-noop). This makes
# any on-demand `playwright install` / `cypress install` below a fast cache hit.
- name: Restore Playwright cache
id: playwright-cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.cache/ms-playwright
key: playwright-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
restore-keys: |
playwright-${{ runner.os }}-
- name: Restore Cypress cache
id: cypress-cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.cache/Cypress
key: cypress-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
restore-keys: |
cypress-${{ runner.os }}-
- name: Install dependencies
if: steps.route.outputs.profile != 'review'
run: pnpm install --frozen-lockfile
# --- On-demand setup, gated on the router decision ---
- name: Install Playwright browser
if: steps.route.outputs.browser == 'playwright'
run: npx -y playwright install --with-deps chromium
- name: Verify Chrome is available
if: steps.route.outputs.browser == 'chrome'
run: |
set -euo pipefail
if command -v google-chrome >/dev/null 2>&1; then
google-chrome --version
elif command -v google-chrome-stable >/dev/null 2>&1; then
google-chrome-stable --version
else
echo "ERROR: Chrome not found on runner."
echo "Install Chrome stable or set --executable-path for chrome-devtools-mcp."
exit 1
fi
- name: Install Cypress binary
if: steps.route.outputs.profile == 'ui' && steps.cypress-cache.outputs.cache-hit != 'true'
run: pnpm --filter @semianalysisai/inferencex-app exec cypress install
- name: Start dev server
id: devserver
if: steps.route.outputs.dev_server == 'true'
continue-on-error: true
run: |
set -euo pipefail
LOG=/tmp/next-dev.log
echo "log=$LOG" >> "$GITHUB_OUTPUT"
pnpm run dev > "$LOG" 2>&1 &
DEV_PID=$!
echo "pid=$DEV_PID" >> "$GITHUB_OUTPUT"
for i in {1..60}; do
if curl -sSf http://localhost:3000 >/dev/null; then
echo "Dev server is up"
echo "up=true" >> "$GITHUB_OUTPUT"
exit 0
fi
if ! kill -0 "$DEV_PID" 2>/dev/null; then
echo "Dev server process exited early"
break
fi
sleep 2
done
echo "Dev server failed to start (best effort; continuing)."
echo "up=false" >> "$GITHUB_OUTPUT"
tail -n 200 "$LOG" || true
kill "$DEV_PID" 2>/dev/null || true
exit 0
# Build the agent prompt + MCP/tool set from the router decision. One adaptive
# prompt: a dynamic routing header selects which profile rules apply; the browser
# MCP wired matches the decision (Playwright by default, Chrome on request).
- name: Compose prompt + tools
id: compose
env:
BROWSER: ${{ steps.route.outputs.browser }}
PROFILE: ${{ steps.route.outputs.profile }}
DEV_SERVER_UP: ${{ steps.devserver.outputs.up }}
DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }}
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
run: |
set -euo pipefail
BROWSER="${BROWSER:-none}"
PROFILE="${PROFILE:-code}"
DEV="${DEV_SERVER_UP:-not started}"
LOG="${DEV_SERVER_LOG:-/tmp/next-dev.log}"
# Review profile: dedicated review prompt + narrow gh-only tools, no browser MCP.
# Shares .github/claude/review-prompt.md with the auto-on-PR review job below.
if [ "$PROFILE" = "review" ]; then
{
echo 'MCP_CONFIG={"mcpServers":{}}'
echo 'CLAUDE_TOOLS=mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)'
} >> "$GITHUB_ENV"
{
echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}"
cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md"
echo '__CLAUDE_PROMPT_EOF__'
} >> "$GITHUB_ENV"
exit 0
fi
# Chrome is the only non-default MCP; everything else gets Playwright (wired even
# for browser=none so the agent can install a browser on demand and drive it).
if [ "$BROWSER" = "chrome" ]; then
WIRED=chrome
MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"chrome":{"command":"npx","args":["-y","chrome-devtools-mcp@latest","--headless=true","--isolated=true"]}}}'
CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__*,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__*,mcp__fetch__*,mcp__chrome__*,Bash'
else
WIRED=playwright
MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"playwright":{"command":"npx","args":["-y","@playwright/mcp@latest","--headless","--caps=vision"]}}}'
CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__*,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__*,mcp__fetch__*,mcp__playwright__*,Bash'
fi
{
echo "MCP_CONFIG=$MCP_CONFIG"
echo "CLAUDE_TOOLS=$CLAUDE_TOOLS"
} >> "$GITHUB_ENV"
# Body lives in .github/claude/implement-prompt.md (already checked out) so this
# YAML stays free of embedded prose; only the dynamic routing header is built here.
{
echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
printf '## Routing (decided by the task router for this run)\n- profile: %s\n- browser MCP wired: %s (use mcp__%s__* tools when browser verification is needed)\n- dev server: %s (log: %s)\n\nApply the "%s" rules under "Task profiles" below.\n\n' \
"$PROFILE" "$WIRED" "$WIRED" "$DEV" "$LOG" "$PROFILE"
cat "${GITHUB_WORKSPACE}/.github/claude/implement-prompt.md"
echo '__CLAUDE_PROMPT_EOF__'
} >> "$GITHUB_ENV"
# No `if: always()` — if setup genuinely failed, skip rather than launch the agent
# with an empty prompt/config. The best-effort dev-server step is continue-on-error,
# so a dev server that didn't come up does not block this step.
- name: Run Claude Code
id: claude
uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148
env:
GH_TOKEN: ${{ secrets.PAT }}
GITHUB_TOKEN: ${{ secrets.PAT }}
BASH_DEFAULT_TIMEOUT_MS: '1800000'
BASH_MAX_TIMEOUT_MS: '3600000'
DEV_SERVER_UP: ${{ steps.devserver.outputs.up }}
DEV_SERVER_PID: ${{ steps.devserver.outputs.pid }}
DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }}
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
trigger_phrase: '@claude'
track_progress: true
allowed_bots: ''
additional_permissions: |
actions: read
claude_args: |
--model "${{ env.CLAUDE_MODEL }}"
--mcp-config '${{ env.MCP_CONFIG }}'
--allowedTools "${{ env.CLAUDE_TOOLS }}"
prompt: ${{ env.PROMPT_BODY }}
# Automatic review on PR open/sync/ready. Comment-triggered reviews (@claude review,
# or any review-phrased ask) are classified by the router in the implement job above
# and share this job's prompt (.github/claude/review-prompt.md). Review-only: narrow tools.
review:
name: Claude (review)
if: >-
github.event_name == 'pull_request'
&& !github.event.pull_request.draft
&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.pull_request.author_association)
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
actions: read
id-token: write
# Same group as the implement job so a comment-triggered review and an auto-review
# on the same PR serialize instead of overlapping with duplicate feedback.
concurrency:
group: claude-${{ github.event.pull_request.number || github.event.issue.number }}
cancel-in-progress: false
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
fetch-depth: 0
# Same review prompt the router's `review` profile uses — single source of truth.
- name: Compose review prompt
env:
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
run: |
set -euo pipefail
{
echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}"
cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md"
echo '__CLAUDE_PROMPT_EOF__'
} >> "$GITHUB_ENV"
- name: PR Review with Claude
uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
trigger_phrase: '@claude review'
track_progress: true
allowed_bots: ''
claude_args: |
--model "${{ env.CLAUDE_MODEL }}"
--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)"
prompt: ${{ env.PROMPT_BODY }}