feat(ci): route @claude tasks via Haiku, provision env on demand #1211
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Claude | |
| on: | |
| issue_comment: | |
| types: [created] | |
| issues: | |
| types: [opened, assigned] | |
| pull_request_review_comment: | |
| types: [created] | |
| pull_request: | |
| types: [opened, synchronize, ready_for_review] | |
| # Single source of truth for the models. Bumping either is a one-line change. | |
| env: | |
| CLAUDE_MODEL: claude-opus-4-8 | |
| CLAUDE_ROUTER_MODEL: claude-haiku-4-5 | |
| jobs: | |
| # @claude <anything> in an issue/comment. A Haiku router classifies the request — | |
| # including review asks — and provisions only the environment it needs (browser/dev | |
| # server installed on demand), then runs the matching prompt. The review job below | |
| # handles only the automatic on-PR-open/sync review (no comment to route). | |
| implement: | |
| name: Claude (implement) | |
| if: >- | |
| ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') | |
| && contains(github.event.comment.body || '', '@claude') | |
| && contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.comment.author_association)) | |
| || | |
| (github.event_name == 'issues' | |
| && (contains(github.event.issue.body || '', '@claude') || contains(github.event.issue.title || '', '@claude')) | |
| && contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.issue.author_association)) | |
| runs-on: ubuntu-latest | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.PAT }} | |
| VERCEL_GIT_COMMIT_REF: claude/${{ github.ref_name }} | |
| CHROME_DEVTOOLS_MCP_NO_USAGE_STATISTICS: '1' | |
| DATABASE_READONLY_URL: ${{ secrets.DATABASE_READONLY_URL }} | |
| DATABASE_DRIVER: neon | |
| DATABASE_SSL: 'true' | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| issues: write | |
| actions: read | |
| id-token: write | |
| # Serialize @claude runs on the same issue/PR so back-to-back comments (incl. reviews) | |
| # don't clobber each other — matches the protection the standalone review job had. | |
| concurrency: | |
| group: claude-${{ github.event.pull_request.number || github.event.issue.number }} | |
| cancel-in-progress: false | |
| steps: | |
| # Decide the run shape from the request text. Explicit keywords win deterministically; | |
| # otherwise Haiku classifies. Any failure falls back to the lean path (no browser/dev | |
| # server), which the agent can still self-provision on demand. | |
| - name: Route task | |
| id: route | |
| env: | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| ROUTER_MODEL: ${{ env.CLAUDE_ROUTER_MODEL }} | |
| BODY: ${{ github.event.comment.body || github.event.issue.body || '' }} | |
| TITLE: ${{ github.event.issue.title || '' }} | |
| run: | | |
| set -euo pipefail | |
| TEXT="$(printf '%s\n%s' "${TITLE:-}" "${BODY:-}")" | |
| emit() { | |
| { | |
| echo "browser=$1" | |
| echo "dev_server=$2" | |
| echo "profile=$3" | |
| } >> "$GITHUB_OUTPUT" | |
| echo "Routed: profile=$3 browser=$1 dev_server=$2 (source: $4)" | |
| } | |
| # 1. Explicit overrides — no API call. | |
| if printf '%s' "$TEXT" | grep -qi '@claude review'; then | |
| emit none false review "explicit @claude review"; exit 0 | |
| fi | |
| if printf '%s' "$TEXT" | grep -qi '@claude chrome'; then | |
| emit chrome true ui "explicit @claude chrome"; exit 0 | |
| fi | |
| if printf '%s' "$TEXT" | grep -qi '@claude frontend'; then | |
| emit playwright true ui "explicit @claude frontend"; exit 0 | |
| fi | |
| if printf '%s' "$TEXT" | grep -qiE '@claude (general|lite)'; then | |
| emit none false code "explicit @claude general"; exit 0 | |
| fi | |
| # 2. Classify with Haiku. Lean fallback on any problem. | |
| if [ -z "${ANTHROPIC_API_KEY:-}" ]; then | |
| emit none false code "no ANTHROPIC_API_KEY"; exit 0 | |
| fi | |
| SYSTEM='You are a routing classifier for an automated engineering bot on InferenceX, a Next.js dashboard for ML inference benchmarks. Given a GitHub issue/comment, decide the execution environment. Respond with ONLY a compact JSON object and nothing else: {"profile":"ui|code|docs|question|review","browser":"playwright|chrome|none"}. Guidance: A request to review, critique, or give feedback on a pull request or its diff (e.g. "review this", "take a look at the PR", "any issues with this change?") -> profile=review, browser=none. UI / chart / D3 / React component / styling / .tsx / visual work -> profile=ui, browser=playwright (use browser=chrome ONLY if the user explicitly asks for Chrome DevTools / network / console debugging). Backend / DB / ETL / ingest / scripts / lib / API-route logic -> profile=code, browser=none. Docs / markdown / blog content / config / CI workflow files -> profile=docs, browser=none. A pure question or code explanation needing no file change -> profile=question, browser=none. When unsure between ui and code, choose code.' | |
| REQ="$(jq -n --arg m "${ROUTER_MODEL:-claude-haiku-4-5}" --arg s "$SYSTEM" --arg u "$TEXT" \ | |
| '{model:$m, max_tokens:300, system:$s, messages:[{role:"user", content:$u}]}' 2>/dev/null || true)" | |
| if [ -z "$REQ" ]; then | |
| emit none false code "request build failed"; exit 0 | |
| fi | |
| RESP="$(curl -sS --max-time 60 https://api.anthropic.com/v1/messages \ | |
| -H "x-api-key: ${ANTHROPIC_API_KEY}" \ | |
| -H "anthropic-version: 2023-06-01" \ | |
| -H "content-type: application/json" \ | |
| -d "$REQ" 2>/dev/null || true)" | |
| OUT="$(printf '%s' "$RESP" | jq -r '.content[0].text // empty' 2>/dev/null || true)" | |
| CLEAN="$(printf '%s' "$OUT" | sed 's/```json//g; s/```//g')" | |
| PROFILE="$(printf '%s' "$CLEAN" | jq -r '.profile // empty' 2>/dev/null || true)" | |
| BROWSER="$(printf '%s' "$CLEAN" | jq -r '.browser // empty' 2>/dev/null || true)" | |
| case "$PROFILE" in ui|code|docs|question|review) ;; *) PROFILE=code ;; esac | |
| case "$BROWSER" in playwright|chrome|none) ;; *) BROWSER=none ;; esac | |
| # Keep browser coherent with profile so the provisioned setup matches the prompt's | |
| # definition-of-done. A ui task must get a browser (default Playwright if the | |
| # classifier omitted it or returned none); every non-ui profile gets none, so it | |
| # never pre-provisions a browser/dev server it won't use. | |
| case "$PROFILE" in | |
| ui) [ "$BROWSER" = "none" ] && BROWSER=playwright || true ;; | |
| *) BROWSER=none ;; | |
| esac | |
| # Dev server is provisioned iff a browser is — you can't verify in a browser | |
| # without the app running, and non-browser tasks don't need it. | |
| if [ "$BROWSER" = "none" ]; then DEVSERVER=false; else DEVSERVER=true; fi | |
| emit "$BROWSER" "$DEVSERVER" "$PROFILE" "haiku" | |
| - name: Checkout repository | |
| uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 | |
| with: | |
| fetch-depth: 0 | |
| token: ${{ secrets.PAT }} | |
| - name: Setup pnpm | |
| uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 | |
| with: | |
| node-version: '24' | |
| cache: 'pnpm' | |
| # Always restore the browser/binary caches (a restore is a near-noop). This makes | |
| # any on-demand `playwright install` / `cypress install` below a fast cache hit. | |
| - name: Restore Playwright cache | |
| id: playwright-cache | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: ~/.cache/ms-playwright | |
| key: playwright-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }} | |
| restore-keys: | | |
| playwright-${{ runner.os }}- | |
| - name: Restore Cypress cache | |
| id: cypress-cache | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: ~/.cache/Cypress | |
| key: cypress-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }} | |
| restore-keys: | | |
| cypress-${{ runner.os }}- | |
| - name: Install dependencies | |
| if: steps.route.outputs.profile != 'review' | |
| run: pnpm install --frozen-lockfile | |
| # --- On-demand setup, gated on the router decision --- | |
| - name: Install Playwright browser | |
| if: steps.route.outputs.browser == 'playwright' | |
| run: npx -y playwright install --with-deps chromium | |
| - name: Verify Chrome is available | |
| if: steps.route.outputs.browser == 'chrome' | |
| run: | | |
| set -euo pipefail | |
| if command -v google-chrome >/dev/null 2>&1; then | |
| google-chrome --version | |
| elif command -v google-chrome-stable >/dev/null 2>&1; then | |
| google-chrome-stable --version | |
| else | |
| echo "ERROR: Chrome not found on runner." | |
| echo "Install Chrome stable or set --executable-path for chrome-devtools-mcp." | |
| exit 1 | |
| fi | |
| - name: Install Cypress binary | |
| if: steps.route.outputs.profile == 'ui' && steps.cypress-cache.outputs.cache-hit != 'true' | |
| run: pnpm --filter @semianalysisai/inferencex-app exec cypress install | |
| - name: Start dev server | |
| id: devserver | |
| if: steps.route.outputs.dev_server == 'true' | |
| continue-on-error: true | |
| run: | | |
| set -euo pipefail | |
| LOG=/tmp/next-dev.log | |
| echo "log=$LOG" >> "$GITHUB_OUTPUT" | |
| pnpm run dev > "$LOG" 2>&1 & | |
| DEV_PID=$! | |
| echo "pid=$DEV_PID" >> "$GITHUB_OUTPUT" | |
| for i in {1..60}; do | |
| if curl -sSf http://localhost:3000 >/dev/null; then | |
| echo "Dev server is up" | |
| echo "up=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| if ! kill -0 "$DEV_PID" 2>/dev/null; then | |
| echo "Dev server process exited early" | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| echo "Dev server failed to start (best effort; continuing)." | |
| echo "up=false" >> "$GITHUB_OUTPUT" | |
| tail -n 200 "$LOG" || true | |
| kill "$DEV_PID" 2>/dev/null || true | |
| exit 0 | |
| # Build the agent prompt + MCP/tool set from the router decision. One adaptive | |
| # prompt: a dynamic routing header selects which profile rules apply; the browser | |
| # MCP wired matches the decision (Playwright by default, Chrome on request). | |
| - name: Compose prompt + tools | |
| id: compose | |
| env: | |
| BROWSER: ${{ steps.route.outputs.browser }} | |
| PROFILE: ${{ steps.route.outputs.profile }} | |
| DEV_SERVER_UP: ${{ steps.devserver.outputs.up }} | |
| DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }} | |
| REPO: ${{ github.repository }} | |
| PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} | |
| run: | | |
| set -euo pipefail | |
| BROWSER="${BROWSER:-none}" | |
| PROFILE="${PROFILE:-code}" | |
| DEV="${DEV_SERVER_UP:-not started}" | |
| LOG="${DEV_SERVER_LOG:-/tmp/next-dev.log}" | |
| # Review profile: dedicated review prompt + narrow gh-only tools, no browser MCP. | |
| # Shares .github/claude/review-prompt.md with the auto-on-PR review job below. | |
| if [ "$PROFILE" = "review" ]; then | |
| { | |
| echo 'MCP_CONFIG={"mcpServers":{}}' | |
| echo 'CLAUDE_TOOLS=mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)' | |
| } >> "$GITHUB_ENV" | |
| { | |
| echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__' | |
| printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}" | |
| cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md" | |
| echo '__CLAUDE_PROMPT_EOF__' | |
| } >> "$GITHUB_ENV" | |
| exit 0 | |
| fi | |
| # Chrome is the only non-default MCP; everything else gets Playwright (wired even | |
| # for browser=none so the agent can install a browser on demand and drive it). | |
| if [ "$BROWSER" = "chrome" ]; then | |
| WIRED=chrome | |
| MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"chrome":{"command":"npx","args":["-y","chrome-devtools-mcp@latest","--headless=true","--isolated=true"]}}}' | |
| CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__*,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__*,mcp__fetch__*,mcp__chrome__*,Bash' | |
| else | |
| WIRED=playwright | |
| MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"playwright":{"command":"npx","args":["-y","@playwright/mcp@latest","--headless","--caps=vision"]}}}' | |
| CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__*,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__*,mcp__fetch__*,mcp__playwright__*,Bash' | |
| fi | |
| { | |
| echo "MCP_CONFIG=$MCP_CONFIG" | |
| echo "CLAUDE_TOOLS=$CLAUDE_TOOLS" | |
| } >> "$GITHUB_ENV" | |
| # Body lives in .github/claude/implement-prompt.md (already checked out) so this | |
| # YAML stays free of embedded prose; only the dynamic routing header is built here. | |
| { | |
| echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__' | |
| printf '## Routing (decided by the task router for this run)\n- profile: %s\n- browser MCP wired: %s (use mcp__%s__* tools when browser verification is needed)\n- dev server: %s (log: %s)\n\nApply the "%s" rules under "Task profiles" below.\n\n' \ | |
| "$PROFILE" "$WIRED" "$WIRED" "$DEV" "$LOG" "$PROFILE" | |
| cat "${GITHUB_WORKSPACE}/.github/claude/implement-prompt.md" | |
| echo '__CLAUDE_PROMPT_EOF__' | |
| } >> "$GITHUB_ENV" | |
| # No `if: always()` — if setup genuinely failed, skip rather than launch the agent | |
| # with an empty prompt/config. The best-effort dev-server step is continue-on-error, | |
| # so a dev server that didn't come up does not block this step. | |
| - name: Run Claude Code | |
| id: claude | |
| uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148 | |
| env: | |
| GH_TOKEN: ${{ secrets.PAT }} | |
| GITHUB_TOKEN: ${{ secrets.PAT }} | |
| BASH_DEFAULT_TIMEOUT_MS: '1800000' | |
| BASH_MAX_TIMEOUT_MS: '3600000' | |
| DEV_SERVER_UP: ${{ steps.devserver.outputs.up }} | |
| DEV_SERVER_PID: ${{ steps.devserver.outputs.pid }} | |
| DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }} | |
| with: | |
| anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| trigger_phrase: '@claude' | |
| track_progress: true | |
| allowed_bots: '' | |
| additional_permissions: | | |
| actions: read | |
| claude_args: | | |
| --model "${{ env.CLAUDE_MODEL }}" | |
| --mcp-config '${{ env.MCP_CONFIG }}' | |
| --allowedTools "${{ env.CLAUDE_TOOLS }}" | |
| prompt: ${{ env.PROMPT_BODY }} | |
| # Automatic review on PR open/sync/ready. Comment-triggered reviews (@claude review, | |
| # or any review-phrased ask) are classified by the router in the implement job above | |
| # and share this job's prompt (.github/claude/review-prompt.md). Review-only: narrow tools. | |
| review: | |
| name: Claude (review) | |
| if: >- | |
| github.event_name == 'pull_request' | |
| && !github.event.pull_request.draft | |
| && contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.pull_request.author_association) | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| actions: read | |
| id-token: write | |
| # Same group as the implement job so a comment-triggered review and an auto-review | |
| # on the same PR serialize instead of overlapping with duplicate feedback. | |
| concurrency: | |
| group: claude-${{ github.event.pull_request.number || github.event.issue.number }} | |
| cancel-in-progress: false | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 | |
| with: | |
| fetch-depth: 0 | |
| # Same review prompt the router's `review` profile uses — single source of truth. | |
| - name: Compose review prompt | |
| env: | |
| REPO: ${{ github.repository }} | |
| PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} | |
| run: | | |
| set -euo pipefail | |
| { | |
| echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__' | |
| printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}" | |
| cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md" | |
| echo '__CLAUDE_PROMPT_EOF__' | |
| } >> "$GITHUB_ENV" | |
| - name: PR Review with Claude | |
| uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148 | |
| with: | |
| anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} | |
| trigger_phrase: '@claude review' | |
| track_progress: true | |
| allowed_bots: '' | |
| claude_args: | | |
| --model "${{ env.CLAUDE_MODEL }}" | |
| --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)" | |
| prompt: ${{ env.PROMPT_BODY }} |