feat(ci): route @claude tasks via Haiku, provision env on demand #1211

Workflow file for this run

	name: Claude

	on:
	issue_comment:
	types: [created]
	issues:
	types: [opened, assigned]
	pull_request_review_comment:
	types: [created]
	pull_request:
	types: [opened, synchronize, ready_for_review]

	# Single source of truth for the models. Bumping either is a one-line change.
	env:
	CLAUDE_MODEL: claude-opus-4-8
	CLAUDE_ROUTER_MODEL: claude-haiku-4-5

	jobs:
	# @claude <anything> in an issue/comment. A Haiku router classifies the request —
	# including review asks — and provisions only the environment it needs (browser/dev
	# server installed on demand), then runs the matching prompt. The review job below
	# handles only the automatic on-PR-open/sync review (no comment to route).
	implement:
	name: Claude (implement)
	if: >-
	((github.event_name == 'issue_comment' \|\| github.event_name == 'pull_request_review_comment')
	&& contains(github.event.comment.body \|\| '', '@claude')
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.comment.author_association))
	\|\|
	(github.event_name == 'issues'
	&& (contains(github.event.issue.body \|\| '', '@claude') \|\| contains(github.event.issue.title \|\| '', '@claude'))
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.issue.author_association))
	runs-on: ubuntu-latest
	env:
	GITHUB_TOKEN: ${{ secrets.PAT }}
	VERCEL_GIT_COMMIT_REF: claude/${{ github.ref_name }}
	CHROME_DEVTOOLS_MCP_NO_USAGE_STATISTICS: '1'
	DATABASE_READONLY_URL: ${{ secrets.DATABASE_READONLY_URL }}
	DATABASE_DRIVER: neon
	DATABASE_SSL: 'true'
	permissions:
	contents: write
	pull-requests: write
	issues: write
	actions: read
	id-token: write
	# Serialize @claude runs on the same issue/PR so back-to-back comments (incl. reviews)
	# don't clobber each other — matches the protection the standalone review job had.
	concurrency:
	group: claude-${{ github.event.pull_request.number \|\| github.event.issue.number }}
	cancel-in-progress: false

	steps:
	# Decide the run shape from the request text. Explicit keywords win deterministically;
	# otherwise Haiku classifies. Any failure falls back to the lean path (no browser/dev
	# server), which the agent can still self-provision on demand.
	- name: Route task
	id: route
	env:
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	ROUTER_MODEL: ${{ env.CLAUDE_ROUTER_MODEL }}
	BODY: ${{ github.event.comment.body \|\| github.event.issue.body \|\| '' }}
	TITLE: ${{ github.event.issue.title \|\| '' }}
	run: \|
	set -euo pipefail

	TEXT="$(printf '%s\n%s' "${TITLE:-}" "${BODY:-}")"

	emit() {
	{
	echo "browser=$1"
	echo "dev_server=$2"
	echo "profile=$3"
	} >> "$GITHUB_OUTPUT"
	echo "Routed: profile=$3 browser=$1 dev_server=$2 (source: $4)"
	}

	# 1. Explicit overrides — no API call.
	if printf '%s' "$TEXT" \| grep -qi '@claude review'; then
	emit none false review "explicit @claude review"; exit 0
	fi
	if printf '%s' "$TEXT" \| grep -qi '@claude chrome'; then
	emit chrome true ui "explicit @claude chrome"; exit 0
	fi
	if printf '%s' "$TEXT" \| grep -qi '@claude frontend'; then
	emit playwright true ui "explicit @claude frontend"; exit 0
	fi
	if printf '%s' "$TEXT" \| grep -qiE '@claude (general\|lite)'; then
	emit none false code "explicit @claude general"; exit 0
	fi

	# 2. Classify with Haiku. Lean fallback on any problem.
	if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
	emit none false code "no ANTHROPIC_API_KEY"; exit 0
	fi

	SYSTEM='You are a routing classifier for an automated engineering bot on InferenceX, a Next.js dashboard for ML inference benchmarks. Given a GitHub issue/comment, decide the execution environment. Respond with ONLY a compact JSON object and nothing else: {"profile":"ui\|code\|docs\|question\|review","browser":"playwright\|chrome\|none"}. Guidance: A request to review, critique, or give feedback on a pull request or its diff (e.g. "review this", "take a look at the PR", "any issues with this change?") -> profile=review, browser=none. UI / chart / D3 / React component / styling / .tsx / visual work -> profile=ui, browser=playwright (use browser=chrome ONLY if the user explicitly asks for Chrome DevTools / network / console debugging). Backend / DB / ETL / ingest / scripts / lib / API-route logic -> profile=code, browser=none. Docs / markdown / blog content / config / CI workflow files -> profile=docs, browser=none. A pure question or code explanation needing no file change -> profile=question, browser=none. When unsure between ui and code, choose code.'

	REQ="$(jq -n --arg m "${ROUTER_MODEL:-claude-haiku-4-5}" --arg s "$SYSTEM" --arg u "$TEXT" \
	'{model:$m, max_tokens:300, system:$s, messages:[{role:"user", content:$u}]}' 2>/dev/null \|\| true)"
	if [ -z "$REQ" ]; then
	emit none false code "request build failed"; exit 0
	fi

	RESP="$(curl -sS --max-time 60 https://api.anthropic.com/v1/messages \
	-H "x-api-key: ${ANTHROPIC_API_KEY}" \
	-H "anthropic-version: 2023-06-01" \
	-H "content-type: application/json" \
	-d "$REQ" 2>/dev/null \|\| true)"

	OUT="$(printf '%s' "$RESP" \| jq -r '.content[0].text // empty' 2>/dev/null \|\| true)"
	CLEAN="$(printf '%s' "$OUT" \| sed 's/```json//g; s/```//g')"
	PROFILE="$(printf '%s' "$CLEAN" \| jq -r '.profile // empty' 2>/dev/null \|\| true)"
	BROWSER="$(printf '%s' "$CLEAN" \| jq -r '.browser // empty' 2>/dev/null \|\| true)"

	case "$PROFILE" in ui\|code\|docs\|question\|review) ;; *) PROFILE=code ;; esac
	case "$BROWSER" in playwright\|chrome\|none) ;; *) BROWSER=none ;; esac
	# Keep browser coherent with profile so the provisioned setup matches the prompt's
	# definition-of-done. A ui task must get a browser (default Playwright if the
	# classifier omitted it or returned none); every non-ui profile gets none, so it
	# never pre-provisions a browser/dev server it won't use.
	case "$PROFILE" in
	ui) [ "$BROWSER" = "none" ] && BROWSER=playwright \|\| true ;;
	*) BROWSER=none ;;
	esac
	# Dev server is provisioned iff a browser is — you can't verify in a browser
	# without the app running, and non-browser tasks don't need it.
	if [ "$BROWSER" = "none" ]; then DEVSERVER=false; else DEVSERVER=true; fi

	emit "$BROWSER" "$DEVSERVER" "$PROFILE" "haiku"

	- name: Checkout repository
	uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
	with:
	fetch-depth: 0
	token: ${{ secrets.PAT }}

	- name: Setup pnpm
	uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8

	- name: Setup Node.js
	uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
	with:
	node-version: '24'
	cache: 'pnpm'

	# Always restore the browser/binary caches (a restore is a near-noop). This makes
	# any on-demand `playwright install` / `cypress install` below a fast cache hit.
	- name: Restore Playwright cache
	id: playwright-cache
	uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: ~/.cache/ms-playwright
	key: playwright-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
	restore-keys: \|
	playwright-${{ runner.os }}-

	- name: Restore Cypress cache
	id: cypress-cache
	uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
	with:
	path: ~/.cache/Cypress
	key: cypress-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }}
	restore-keys: \|
	cypress-${{ runner.os }}-

	- name: Install dependencies
	if: steps.route.outputs.profile != 'review'
	run: pnpm install --frozen-lockfile

	# --- On-demand setup, gated on the router decision ---

	- name: Install Playwright browser
	if: steps.route.outputs.browser == 'playwright'
	run: npx -y playwright install --with-deps chromium

	- name: Verify Chrome is available
	if: steps.route.outputs.browser == 'chrome'
	run: \|
	set -euo pipefail
	if command -v google-chrome >/dev/null 2>&1; then
	google-chrome --version
	elif command -v google-chrome-stable >/dev/null 2>&1; then
	google-chrome-stable --version
	else
	echo "ERROR: Chrome not found on runner."
	echo "Install Chrome stable or set --executable-path for chrome-devtools-mcp."
	exit 1
	fi

	- name: Install Cypress binary
	if: steps.route.outputs.profile == 'ui' && steps.cypress-cache.outputs.cache-hit != 'true'
	run: pnpm --filter @semianalysisai/inferencex-app exec cypress install

	- name: Start dev server
	id: devserver
	if: steps.route.outputs.dev_server == 'true'
	continue-on-error: true
	run: \|
	set -euo pipefail

	LOG=/tmp/next-dev.log
	echo "log=$LOG" >> "$GITHUB_OUTPUT"

	pnpm run dev > "$LOG" 2>&1 &
	DEV_PID=$!
	echo "pid=$DEV_PID" >> "$GITHUB_OUTPUT"

	for i in {1..60}; do
	if curl -sSf http://localhost:3000 >/dev/null; then
	echo "Dev server is up"
	echo "up=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	if ! kill -0 "$DEV_PID" 2>/dev/null; then
	echo "Dev server process exited early"
	break
	fi
	sleep 2
	done

	echo "Dev server failed to start (best effort; continuing)."
	echo "up=false" >> "$GITHUB_OUTPUT"
	tail -n 200 "$LOG" \|\| true
	kill "$DEV_PID" 2>/dev/null \|\| true
	exit 0

	# Build the agent prompt + MCP/tool set from the router decision. One adaptive
	# prompt: a dynamic routing header selects which profile rules apply; the browser
	# MCP wired matches the decision (Playwright by default, Chrome on request).
	- name: Compose prompt + tools
	id: compose
	env:
	BROWSER: ${{ steps.route.outputs.browser }}
	PROFILE: ${{ steps.route.outputs.profile }}
	DEV_SERVER_UP: ${{ steps.devserver.outputs.up }}
	DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }}
	REPO: ${{ github.repository }}
	PR_NUMBER: ${{ github.event.pull_request.number \|\| github.event.issue.number }}
	run: \|
	set -euo pipefail
	BROWSER="${BROWSER:-none}"
	PROFILE="${PROFILE:-code}"
	DEV="${DEV_SERVER_UP:-not started}"
	LOG="${DEV_SERVER_LOG:-/tmp/next-dev.log}"

	# Review profile: dedicated review prompt + narrow gh-only tools, no browser MCP.
	# Shares .github/claude/review-prompt.md with the auto-on-PR review job below.
	if [ "$PROFILE" = "review" ]; then
	{
	echo 'MCP_CONFIG={"mcpServers":{}}'
	echo 'CLAUDE_TOOLS=mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:),Bash(gh pr diff:),Bash(gh pr view:*)'
	} >> "$GITHUB_ENV"
	{
	echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
	printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}"
	cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md"
	echo '__CLAUDE_PROMPT_EOF__'
	} >> "$GITHUB_ENV"
	exit 0
	fi

	# Chrome is the only non-default MCP; everything else gets Playwright (wired even
	# for browser=none so the agent can install a browser on demand and drive it).
	if [ "$BROWSER" = "chrome" ]; then
	WIRED=chrome
	MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"chrome":{"command":"npx","args":["-y","chrome-devtools-mcp@latest","--headless=true","--isolated=true"]}}}'
	CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__,mcp__fetch__,mcp__chrome__,Bash'
	else
	WIRED=playwright
	MCP_CONFIG='{"mcpServers":{"fetch":{"command":"npx","args":["-y","@anthropic-ai/mcp-server-fetch@latest"]},"playwright":{"command":"npx","args":["-y","@playwright/mcp@latest","--headless","--caps=vision"]}}}'
	CLAUDE_TOOLS='Write,Edit,Read,Glob,Grep,WebFetch,mcp__github__,mcp__github_inline_comment__create_inline_comment,mcp__github_ci__,mcp__fetch__,mcp__playwright__,Bash'
	fi
	{
	echo "MCP_CONFIG=$MCP_CONFIG"
	echo "CLAUDE_TOOLS=$CLAUDE_TOOLS"
	} >> "$GITHUB_ENV"

	# Body lives in .github/claude/implement-prompt.md (already checked out) so this
	# YAML stays free of embedded prose; only the dynamic routing header is built here.
	{
	echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
	printf '## Routing (decided by the task router for this run)\n- profile: %s\n- browser MCP wired: %s (use mcp__%s__* tools when browser verification is needed)\n- dev server: %s (log: %s)\n\nApply the "%s" rules under "Task profiles" below.\n\n' \
	"$PROFILE" "$WIRED" "$WIRED" "$DEV" "$LOG" "$PROFILE"
	cat "${GITHUB_WORKSPACE}/.github/claude/implement-prompt.md"
	echo '__CLAUDE_PROMPT_EOF__'
	} >> "$GITHUB_ENV"

	# No `if: always()` — if setup genuinely failed, skip rather than launch the agent
	# with an empty prompt/config. The best-effort dev-server step is continue-on-error,
	# so a dev server that didn't come up does not block this step.
	- name: Run Claude Code
	id: claude
	uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148
	env:
	GH_TOKEN: ${{ secrets.PAT }}
	GITHUB_TOKEN: ${{ secrets.PAT }}
	BASH_DEFAULT_TIMEOUT_MS: '1800000'
	BASH_MAX_TIMEOUT_MS: '3600000'
	DEV_SERVER_UP: ${{ steps.devserver.outputs.up }}
	DEV_SERVER_PID: ${{ steps.devserver.outputs.pid }}
	DEV_SERVER_LOG: ${{ steps.devserver.outputs.log }}
	with:
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
	github_token: ${{ secrets.GITHUB_TOKEN }}
	trigger_phrase: '@claude'
	track_progress: true
	allowed_bots: ''

	additional_permissions: \|
	actions: read

	claude_args: \|
	--model "${{ env.CLAUDE_MODEL }}"
	--mcp-config '${{ env.MCP_CONFIG }}'
	--allowedTools "${{ env.CLAUDE_TOOLS }}"

	prompt: ${{ env.PROMPT_BODY }}

	# Automatic review on PR open/sync/ready. Comment-triggered reviews (@claude review,
	# or any review-phrased ask) are classified by the router in the implement job above
	# and share this job's prompt (.github/claude/review-prompt.md). Review-only: narrow tools.
	review:
	name: Claude (review)
	if: >-
	github.event_name == 'pull_request'
	&& !github.event.pull_request.draft
	&& contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "BOT"]'), github.event.pull_request.author_association)
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: write
	actions: read
	id-token: write
	# Same group as the implement job so a comment-triggered review and an auto-review
	# on the same PR serialize instead of overlapping with duplicate feedback.
	concurrency:
	group: claude-${{ github.event.pull_request.number \|\| github.event.issue.number }}
	cancel-in-progress: false
	steps:
	- name: Checkout repository
	uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
	with:
	fetch-depth: 0

	# Same review prompt the router's `review` profile uses — single source of truth.
	- name: Compose review prompt
	env:
	REPO: ${{ github.repository }}
	PR_NUMBER: ${{ github.event.pull_request.number \|\| github.event.issue.number }}
	run: \|
	set -euo pipefail
	{
	echo 'PROMPT_BODY<<__CLAUDE_PROMPT_EOF__'
	printf 'REPO: %s\nPR NUMBER: %s\n\n' "${REPO:-}" "${PR_NUMBER:-}"
	cat "${GITHUB_WORKSPACE}/.github/claude/review-prompt.md"
	echo '__CLAUDE_PROMPT_EOF__'
	} >> "$GITHUB_ENV"

	- name: PR Review with Claude
	uses: anthropics/claude-code-action@d5726de019ec4498aa667642bc3a80fca83aa102 # v1.0.148
	with:
	anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
	trigger_phrase: '@claude review'
	track_progress: true
	allowed_bots: ''

	claude_args: \|
	--model "${{ env.CLAUDE_MODEL }}"
	--allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:),Bash(gh pr diff:),Bash(gh pr view:*)"

	prompt: ${{ env.PROMPT_BODY }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

feat(ci): route @claude tasks via Haiku, provision env on demand #1211

Workflow file

feat(ci): route @claude tasks via Haiku, provision env on demand #1211

Uh oh!

Workflow file for this run