pytorch · metascroy · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.ci/scripts/build_macos_wheels.sh b/.ci/scripts/build_macos_wheels.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Build ExecuTorch and its locally-built dependencies (torchao, tokenizers)
+# as Python wheels into the output directory passed as $1.
+#
+# Designed to be invoked from a CI "build artifact" job. The produced wheels
+# can then be uploaded with `upload-artifact:` and consumed by downstream
+# jobs via `install_executorch.sh --prebuilt-wheel-dir <path>`.
+#
+# Caller-controlled environment variables that influence the build (must
+# match downstream consumer expectations):
+#   EXECUTORCH_BUILD_KERNELS_TORCHAO
+#   TORCHAO_BUILD_EXPERIMENTAL_MPS
+#   CMAKE_ARGS
+#   MACOSX_DEPLOYMENT_TARGET  (e.g. 14.0 to make the wheel installable on
+#                              older macOS runners in the same cluster)
+#
+# Usage:
+#   build_macos_wheels.sh <output-dir>
+#
+# Output:
+#   <output-dir>/torchao-*.whl
+#   <output-dir>/pytorch_tokenizers-*.whl
+#   <output-dir>/executorch-*.whl
+
+set -euxo pipefail
+
+if [[ $# -ne 1 ]]; then
+  echo "Usage: $0 <output-dir>" >&2
+  exit 1
+fi
+
+OUTPUT_DIR="$1"
+WHEEL_DIR="${OUTPUT_DIR}"
+mkdir -p "${WHEEL_DIR}"
+
+# cd to repo root regardless of invocation directory.
+SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+REPO_ROOT="$( cd -- "${SCRIPT_DIR}/../.." &> /dev/null && pwd )"
+cd "${REPO_ROOT}"
+
+# Ensure all required submodules are populated before any build runs.
+git submodule sync --recursive
+git submodule update --init --recursive
+
+# Install runtime/build dependencies.
+# We need torch installed before invoking `pip wheel` on the local sources
+# because their build hooks `import torch`. Single source of truth for the
+# pinned torch version + requirements-dev.txt is install_requirements.py.
+#
+# IMPORTANT: pass use_pytorch_nightly=True. Despite the name, this is the
+# branch that pins torch to a specific version (currently torch==2.11.0).
+# The =False branch assumes torch was already installed from a pinned
+# commit by an earlier docker layer, which is NOT true on a fresh macOS
+# conda env. Passing False here would cause pip to resolve `torch` to
+# whatever is newest (e.g. 2.12.0), the executorch wheel would be linked
+# against that, and the consumer job — which DOES pin torch via
+# install_executorch.sh's default path — would then end up with a
+# different torch at runtime, producing a libtorch_cpu ABI mismatch
+# (Symbol not found: torch::Library::_def(...c10::headeronly::Tag...)).
+PYTHON="${PYTHON_EXECUTABLE:-python}"
+"${PYTHON}" -c "from install_requirements import install_torch_and_dev_requirements; install_torch_and_dev_requirements(use_pytorch_nightly=True)"
+
+# Build torchao wheel. install_requirements.py sets USE_CPP/CMAKE_POLICY_VERSION_MINIMUM
+# based on EXECUTORCH_BUILD_KERNELS_TORCHAO; replicate that here so the produced
+# wheel matches what install_executorch.sh would have built.
+if [[ "${EXECUTORCH_BUILD_KERNELS_TORCHAO:-0}" == "1" ]]; then
+  export USE_CPP=1
+  export CMAKE_POLICY_VERSION_MINIMUM="3.5"
+else
+  export USE_CPP=0
+fi
+
+"${PYTHON}" -m pip wheel \
+  --no-deps \
+  --no-build-isolation \
+  --wheel-dir "${WHEEL_DIR}" \
+  ./third-party/ao
+
+# Install the just-built torchao so the executorch wheel build (which
+# imports torchao at build time in some configurations) succeeds.
+"${PYTHON}" -m pip install "${WHEEL_DIR}"/torchao-*.whl
+
+# Build the tokenizers wheel (parity with install_requirements LOCAL_REQUIREMENTS).
+"${PYTHON}" -m pip wheel \
+  --no-deps \
+  --no-build-isolation \
+  --wheel-dir "${WHEEL_DIR}" \
+  ./extension/llm/tokenizers
+
+"${PYTHON}" -m pip install "${WHEEL_DIR}"/pytorch_tokenizers-*.whl
+
+# Finally, build the executorch wheel. CMAKE_ARGS / EXECUTORCH_BUILD_KERNELS_TORCHAO
+# from the caller's environment are honored by the build backend.
+"${PYTHON}" -m pip wheel \
+  --no-deps \
+  --no-build-isolation \
+  --wheel-dir "${WHEEL_DIR}" \
+  .
+
+echo "Built wheels:"
+ls -lah "${WHEEL_DIR}"
diff --git a/.ci/scripts/install_executorch_from_wheels.sh b/.ci/scripts/install_executorch_from_wheels.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Consume prebuilt ExecuTorch wheels produced by build_macos_wheels.sh and
+# downloaded into ${RUNNER_ARTIFACT_DIR} via the macos_job.yml
+# `download-artifact:` input, then run install_executorch.sh against them.
+#
+# This script:
+#   1. Moves the downloaded *.whl files out of ${RUNNER_ARTIFACT_DIR} so a
+#      subsequent `upload-artifact:` from the same job does not re-upload
+#      them as part of an unrelated artifact (e.g. an exported .pte).
+#   2. Invokes install_executorch.sh --prebuilt-wheel-dir <moved-dir>,
+#      forwarding any additional flags after the optional --.
+#
+# Usage:
+#   install_executorch_from_wheels.sh [-- <extra install_executorch.sh flags>]
+#
+# Required environment:
+#   RUNNER_ARTIFACT_DIR  (set by pytorch/test-infra macos_job.yml)
+#   CONDA_RUN            (optional; used as conda env wrapper if present)
+#
+# Notes:
+#   - Honors EXECUTORCH_BUILD_KERNELS_TORCHAO / TORCHAO_BUILD_EXPERIMENTAL_MPS
+#     etc., but those should match the values used at wheel-build time.
+#   - The repo root must be the current working directory when invoked.
+
+set -euxo pipefail
+
+if [[ -z "${RUNNER_ARTIFACT_DIR:-}" ]]; then
+  echo "ERROR: RUNNER_ARTIFACT_DIR is not set." >&2
+  exit 1
+fi
+
+WHEEL_DIR="${RUNNER_TEMP:-/tmp}/prebuilt_executorch_wheels"
+mkdir -p "${WHEEL_DIR}"
+
+# Move every wheel out of the artifact dir so it isn't re-uploaded.
+shopt -s nullglob
+WHEELS=( "${RUNNER_ARTIFACT_DIR}"/*.whl )
+shopt -u nullglob
+if [[ ${#WHEELS[@]} -eq 0 ]]; then
+  echo "ERROR: no *.whl files found in ${RUNNER_ARTIFACT_DIR}." >&2
+  echo "Did the consumer job set download-artifact correctly?" >&2
+  exit 1
+fi
+mv -v "${WHEELS[@]}" "${WHEEL_DIR}/"
+
+EXTRA_ARGS=()
+if [[ $# -gt 0 ]]; then
+  if [[ "$1" == "--" ]]; then
+    shift
+  fi
+  EXTRA_ARGS=( "$@" )
+fi
+
+# Forward to install_executorch.sh. Honor ${CONDA_RUN} if set (matches the
+# convention used in metal.yml and friends).
+#
+# NOTE: The macos_job.yml reusable workflow runs scripts with `set -u`, which
+# makes "${EXTRA_ARGS[@]}" error when the array is empty ("unbound variable").
+# Use the `${arr[@]+"${arr[@]}"}` idiom to expand to nothing in that case.
+if [[ -n "${CONDA_RUN:-}" ]]; then
+  ${CONDA_RUN} ./install_executorch.sh \
+    --prebuilt-wheel-dir "${WHEEL_DIR}" \
+    ${EXTRA_ARGS[@]+"${EXTRA_ARGS[@]}"}
+else
+  ./install_executorch.sh \
+    --prebuilt-wheel-dir "${WHEEL_DIR}" \
+    ${EXTRA_ARGS[@]+"${EXTRA_ARGS[@]}"}
+fi
diff --git a/.github/workflows/metal.yml b/.github/workflows/metal.yml
@@ -22,6 +22,9 @@
 
 jobs:
   test-metal-builds:
+    # Standalone canary that exercises the EXECUTORCH_BUILD_METAL=ON build
+    # mode. This config is unique to this job (no TORCHAO/MPS), so it stays
+    # outside the shared-wheel pipeline below.
     name: test-executorch-metal-build
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
@@ -38,8 +41,40 @@
         PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_METAL=ON" ${CONDA_RUN} --no-capture-output ./install_executorch.sh
         echo "::endgroup::"
 
+  # Build executorch + torchao + tokenizers wheels once with the
+  # Metal/torchao-MPS configuration shared by every downstream Metal test
+  # job below. Output wheels are uploaded as a single artifact and
+  # consumed by the matrix jobs via download-artifact.
+  build-metal-wheels:
+    name: build-metal-wheels
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      default-packages: ""
+      runner: macos-m2-stable
+      python-version: '3.11'
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      # Artifact name encodes (cluster, runner-os, arch, python) so a
+      # consumer that downloads with a mismatched name fails loudly
+      # rather than silently using a wrong-platform wheel.
+      upload-artifact: executorch-metal-wheels-macos14-arm64-py311
+      script: |
+        set -eux
+
+        echo "::group::Build prebuilt Metal wheels (executorch + torchao + tokenizers)"
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} \
+          EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 \
+          bash .ci/scripts/build_macos_wheels.sh "${RUNNER_ARTIFACT_DIR}"
+        echo "::endgroup::"
+
+        echo "::group::Wheel inventory"
+        ls -lah "${RUNNER_ARTIFACT_DIR}"
+        echo "::endgroup::"
+
   test-metal-modules:
     name: test-metal-backend-modules
+    needs: build-metal-wheels
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       default-packages: ""
@@ -48,11 +83,15 @@
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 120
+      download-artifact: executorch-metal-wheels-macos14-arm64-py311
       script: |
         set -eux
 
-        echo "::group::Setup ExecuTorch"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
+        echo "::group::Setup ExecuTorch (from prebuilt wheels)"
+        # Build-time flags (EXECUTORCH_BUILD_KERNELS_TORCHAO,
+        # TORCHAO_BUILD_EXPERIMENTAL_MPS, etc.) are baked into the prebuilt
+        # wheels by the build-metal-wheels job and are NOT re-applied here.
+        bash .ci/scripts/install_executorch_from_wheels.sh
         echo "::endgroup::"
 
         echo "::group::Build Metal Runtime"
@@ -65,6 +104,7 @@
 
   test-metal-qwen35-moe-tiny:
     name: test-metal-qwen35-moe-tiny
+    needs: build-metal-wheels
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       default-packages: ""
@@ -73,11 +113,13 @@
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 120
+      download-artifact: executorch-metal-wheels-macos14-arm64-py311
       script: |
         set -eux
 
-        echo "::group::Setup ExecuTorch"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
+        echo "::group::Setup ExecuTorch (from prebuilt wheels)"
+        # Build-time flags are baked into the prebuilt wheels by build-metal-wheels.
+        bash .ci/scripts/install_executorch_from_wheels.sh
         echo "::endgroup::"
 
         # Isolate Inductor cache per job to prevent PCH conflicts
@@ -164,6 +206,7 @@
     name: export-model-metal-artifact
     # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
     if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
+    needs: build-metal-wheels
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     strategy:
@@ -197,6 +240,7 @@
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       secrets-env: EXECUTORCH_HF_TOKEN
+      download-artifact: executorch-metal-wheels-macos14-arm64-py311
       upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-metal-${{ matrix.quant }}
       script: |
         set -eux
@@ -212,8 +256,15 @@
         ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
         echo "::endgroup::"
 
-        echo "::group::Setup ExecuTorch"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
+        echo "::group::Setup ExecuTorch (from prebuilt wheels)"
+        # install_executorch_from_wheels.sh moves the downloaded *.whl out of
+        # ${RUNNER_ARTIFACT_DIR} so the subsequent .pte upload only contains
+        # the exported model. We re-add wheels under wheels/ at the end so
+        # the downstream e2e job (which only has one download-artifact slot)
+        # can install from the same artifact it pulls the .pte from.
+        # Build-time flags are baked into the prebuilt wheels by build-metal-wheels.
+        bash .ci/scripts/install_executorch_from_wheels.sh
+        WHEEL_CACHE="${RUNNER_TEMP}/prebuilt_executorch_wheels"
         echo "::endgroup::"
 
         echo "::group::Pip List"
@@ -228,9 +279,17 @@
         export TORCHINDUCTOR_CACHE_DIR=$(mktemp -d "${RUNNER_TEMP}/inductor_cache_XXXXXX")
         ${CONDA_RUN} bash .ci/scripts/export_model_artifact.sh metal "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
 
+        echo "::group::Bundle prebuilt wheels into export artifact"
+        # Re-emit the prebuilt wheels under wheels/ inside RUNNER_ARTIFACT_DIR
+        # so the e2e consumer can install ExecuTorch from them without needing
+        # a second download-artifact slot.
+        mkdir -p "${RUNNER_ARTIFACT_DIR}/wheels"
+        cp -v "${WHEEL_CACHE}"/*.whl "${RUNNER_ARTIFACT_DIR}/wheels/"
+        echo "::endgroup::"
+
   test-model-metal-e2e:
     name: test-model-metal-e2e
-    needs: export-model-metal-artifact
+    needs: [build-metal-wheels, export-model-metal-artifact]
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       fail-fast: false
@@ -262,6 +321,9 @@
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
+      # The export artifact carries both the .pte and a wheels/ subdir
+      # containing the prebuilt ExecuTorch wheels. macos_job.yml's single
+      # download-artifact slot is therefore enough for both inputs.
       download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-metal-${{ matrix.quant }}
       script: |
         set -eux
@@ -290,4 +352,23 @@
         fi
         echo "::endgroup::"
 
+        echo "::group::Setup ExecuTorch (from prebuilt wheels in export artifact)"
+        WHEEL_DIR="${RUNNER_TEMP}/prebuilt_executorch_wheels"
+        mkdir -p "${WHEEL_DIR}"
+        # Move bundled wheels out of RUNNER_ARTIFACT_DIR so test_model_e2e.sh
+        # only sees the .pte it expects there.
+        mv -v "${RUNNER_ARTIFACT_DIR}/wheels"/*.whl "${WHEEL_DIR}/"
+        rmdir "${RUNNER_ARTIFACT_DIR}/wheels"
+        ls -lah "${WHEEL_DIR}"
+        # --minimal: skip install_optional_example_requirements (transformers,
+        # datasets==3.6.0, torchsr, torchtune, timm). The e2e runner build and
+        # test_model_e2e.sh's audio/image setup do not need any of those, and
+        # pre-installing the pinned datasets==3.6.0 collides with
+        # test_model_e2e.sh's own `pip install datasets` (no pin) — the older
+        # pinned version requires librosa for librispeech audio decode, which
+        # is not in any requirements file. Matches the original test-model-
+        # metal-e2e behavior, which did not run install_executorch.sh at all.
+        ${CONDA_RUN} ./install_executorch.sh --prebuilt-wheel-dir "${WHEEL_DIR}" --minimal
+        echo "::endgroup::"
+
         ${CONDA_RUN} bash .ci/scripts/test_model_e2e.sh metal "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"