Pipeline the firmware queue: concurrent compile + upload lanes #3319
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test | |
| # Runs lint + tests on every PR and on every push to main. Mirrors | |
| # the matter-server / music-assistant pattern: one ``lint`` job that | |
| # runs the same pre-commit hooks contributors run locally, plus a | |
| # ``test`` matrix across the supported Python versions. The catalog | |
| # smoke test (``script/check_catalog.py``) runs alongside lint so a | |
| # bad sync result fails CI even when no one ran the full sync. | |
| permissions: | |
| contents: read | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_call: | |
| # Lets release.yml run the full lint + test matrix as a | |
| # preflight against the branch it's about to release from. | |
| inputs: | |
| ref: | |
| description: "Git ref to check out (defaults to main)." | |
| required: false | |
| type: string | |
| default: main | |
| jobs: | |
| lint: | |
| name: Lint + smoke checks | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Set up Python | |
| uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version: "3.13" | |
| - name: Set up uv | |
| # uv replaces pip for the install step (an order of | |
| # magnitude faster on cold boots, with its own wheel cache). | |
| # ``actions/setup-python`` provides the interpreter — its | |
| # Python isn't marked externally-managed, so ``uv pip | |
| # install --system`` works on macos / windows runners that | |
| # would otherwise refuse a brew-shipped Python under PEP 668. | |
| uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 | |
| with: | |
| enable-cache: true | |
| - name: Install package + dev tools | |
| # ``[esphome]`` is needed so the catalog smoke test below | |
| # can construct a ``ComponentCatalog`` against the same | |
| # esphome version the dashboard ships with. ``--system`` | |
| # installs into the runner's Python instead of a venv — | |
| # matches the existing pip-based CI shape so subsequent | |
| # ``pre-commit`` / ``python script/...`` steps keep working | |
| # without a ``uv run`` prefix. | |
| run: uv pip install --system -e '.[esphome,test]' | |
| - name: Cache pre-commit hook envs | |
| # Keyed on the python version + ``.pre-commit-config.yaml`` | |
| # hash so any hook bump invalidates automatically. Mirrors | |
| # what ``pre-commit/action`` does internally — inlined here | |
| # because that action's transitive ``actions/cache@v4`` | |
| # reference isn't SHA-pinned, which the org policy blocks | |
| # ("all actions must be pinned to a full-length commit SHA"). | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: ~/.cache/pre-commit | |
| key: pre-commit|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} | |
| - name: Run pre-commit (ruff lint + format, codespell, yaml/json checks) | |
| # On a cache hit the per-hook envs (ruff, codespell, …) are | |
| # already on disk, so the previously-30s init phase drops | |
| # to a near-instant restore. ``no-commit-to-branch`` is a | |
| # local-only guard; CI runs on branches by definition, so | |
| # skip it the same way matter-server does. | |
| run: SKIP=no-commit-to-branch pre-commit run --all-files --show-diff-on-failure --color=always | |
| - name: Validate board / component manifests | |
| run: python script/validate_definitions.py | |
| - name: Verify boards.json is in sync with manifests | |
| # Catches PRs that bypass the pre-commit hook — the diff | |
| # fails the build if the committed JSON doesn't match what | |
| # the script regenerates from the YAMLs. | |
| run: | | |
| python script/sync_boards.py | |
| git diff --exit-code -- esphome_device_builder/definitions/boards.json | |
| - name: Smoke-test component catalog | |
| run: python script/check_catalog.py | |
| - name: Type-check (mypy) | |
| # Mypy is configured strict in ``pyproject.toml`` | |
| # (``disallow_untyped_defs``, ``disallow_incomplete_defs``, | |
| # ``warn_return_any``). Hard gate — a typing regression | |
| # blocks the PR. Started life as advisory (#481) while the | |
| # 24-error baseline got walked down to zero across PRs | |
| # #483-#492; flipped on once the standing count hit zero. | |
| run: mypy esphome_device_builder | |
| test: | |
| name: Pytest (${{ matrix.os }} / Python ${{ matrix.python-version }}) | |
| runs-on: ${{ matrix.os }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| # Linux only runs on 3.12 (the oldest supported Python) — the | |
| # ``test-esphome-channels`` job below already covers 3.14 on | |
| # Linux against beta and dev esphome, which is a strict | |
| # superset of "stable esphome on 3.14 / Linux". Windows and | |
| # macOS only run on the newest Python — enough to catch | |
| # OS-specific regressions without paying for extra runs on | |
| # slower runners. | |
| include: | |
| - os: ubuntu-latest | |
| python-version: "3.12" | |
| - os: windows-latest | |
| python-version: "3.14" | |
| - os: macos-latest | |
| python-version: "3.14" | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Set up uv and Python ${{ matrix.python-version }} | |
| # Astral-managed Python (PGO + LTO + BOLT + mimalloc, plus | |
| # the PEP 744 tail-call interpreter on 3.14+) shortens the | |
| # suite end-to-end. The benchmarks job below stays on stock | |
| # CPython so CodSpeed's callgrind instruction counts remain | |
| # comparable against the historical baseline. | |
| uses: ./.github/actions/setup-uv-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Create venv + install package + test deps | |
| # Managed Python isn't a system interpreter; install into a | |
| # uv-discovered ``.venv`` and invoke pytest via ``uv run``. | |
| # Cross-platform: ``uv venv`` works the same on POSIX + | |
| # Windows runners and avoids per-shell activate scripts. | |
| run: | | |
| uv venv | |
| uv pip install -e '.[esphome,test]' | |
| - name: Run pytest | |
| # ``uv run`` resolves the .venv created above. Single-line — | |
| # Windows runners default to PowerShell, which doesn't accept | |
| # bash-style ``\`` line continuation. ``-n auto`` runs the | |
| # suite under pytest-xdist with one worker per logical CPU; | |
| # pytest-cov auto-merges the per-worker ``.coverage`` files | |
| # at the end so the xml report still reflects the whole | |
| # suite. ``--maxfail=5`` keeps CI snappy when something | |
| # fundamental is broken; ``-q`` keeps the log readable | |
| # without ``-vv``. The ``benchmarks/`` subtree is excluded — | |
| # it's CodSpeed-driven, runs in a separate job, and its | |
| # assertions only check chunk counts (not behaviour). | |
| # Two-layer hang protection: | |
| # * ``--timeout=120`` (pytest-timeout plugin) faults any | |
| # individual test that wedges for more than 2 minutes, | |
| # surfacing the offending test name + traceback in the log. | |
| # * ``timeout-minutes: 5`` is the outer hard cap — if the | |
| # plugin can't recover (deadlocked event loop, stuck | |
| # subprocess), the runner kills the step. Healthy runs land | |
| # at ~1-2 minutes; without the cap a single hung worker | |
| # burns the runner's full 6h budget. | |
| timeout-minutes: 5 | |
| run: uv run pytest -q -n auto --maxfail=5 --durations=10 --timeout=120 --ignore=tests/benchmarks --ignore=tests/real_compile --ignore=tests/e2e --cov=esphome_device_builder --cov-report=xml --cov-report=term | |
| - name: Upload coverage to Codecov | |
| uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1 | |
| with: | |
| files: ./coverage.xml | |
| flags: py${{ matrix.python-version }} | |
| fail_ci_if_error: false | |
| test-esphome-channels: | |
| name: Pytest (esphome ${{ matrix.channel }} / ${{ matrix.os }} / Python ${{ matrix.python-version }}) | |
| runs-on: ${{ matrix.os }} | |
| # Probes the dashboard against the next two esphome release | |
| # channels on a single Linux 3.14 runner. The ``test`` matrix | |
| # above already covers stable (it's what ``pip install | |
| # -e '.[esphome]'`` resolves to), so this job focuses on the | |
| # forward-looking channels: ``beta`` is a strict gate so we catch | |
| # incompatibilities before they ship to users, and ``dev`` is | |
| # advisory (allow-failure) because ESPHome's main-branch nightly | |
| # can break mid-day — we want the signal without a permanent red | |
| # on every device-builder PR. | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - channel: beta | |
| os: ubuntu-latest | |
| python-version: "3.14" | |
| # ``--prerelease=allow`` opts into pre-release versions | |
| # (uv's flag — pip's ``--pre`` doesn't apply here). | |
| # ``--upgrade`` makes uv pick the highest one even if a | |
| # stable is already installed transitively. | |
| install: uv pip install --upgrade --prerelease=allow esphome | |
| allow_failure: false | |
| - channel: dev | |
| os: ubuntu-latest | |
| python-version: "3.14" | |
| # The ``dev`` branch is ESPHome's nightly working copy; | |
| # it can break at any time and we want the signal but | |
| # not the gate. | |
| install: uv pip install --upgrade git+https://github.com/esphome/esphome.git@dev | |
| allow_failure: true | |
| # Job-level ``continue-on-error`` decides whether a failure of | |
| # this matrix entry fails the whole workflow. ``dev`` opts into | |
| # advisory-only via the matrix flag; stable + beta stay strict. | |
| continue-on-error: ${{ matrix.allow_failure }} | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Set up uv and Python ${{ matrix.python-version }} | |
| uses: ./.github/actions/setup-uv-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Create venv + install package + test deps | |
| run: | | |
| uv venv | |
| uv pip install -e '.[esphome,test]' | |
| - name: Install esphome (${{ matrix.channel }} channel) | |
| run: ${{ matrix.install }} | |
| - name: Show installed esphome version | |
| # Logs the resolved version so a failure tied to a specific | |
| # release is greppable in the workflow log without re-running. | |
| run: uv pip show esphome | grep -E '^(Name|Version):' | |
| - name: Run pytest | |
| # ``uv run`` resolves the .venv created above. ``-n auto`` | |
| # runs under pytest-xdist for the same speedup the OS-axis | |
| # matrix gets. No ``--cov`` here — the merged coverage report | |
| # comes from the OS-axis ``test`` job; this run is purely a | |
| # "does the suite still pass against upstream X" probe. | |
| # ``--timeout=120`` (per-test) + ``timeout-minutes: 5`` | |
| # (outer hard cap) match the OS-axis job — hangs against | |
| # upstream beta/dev are exactly the case this protects | |
| # against. | |
| timeout-minutes: 5 | |
| run: uv run pytest -q -n auto --maxfail=5 --durations=10 --timeout=120 --ignore=tests/benchmarks --ignore=tests/real_compile --ignore=tests/e2e | |
| e2e: | |
| name: E2E (esphome ${{ matrix.channel }}) | |
| runs-on: ubuntu-latest | |
| # The remote-build offload e2e suite (``tests/e2e/``) stands up | |
| # two real controllers over a live peer-link wire. It's split off | |
| # the unit matrix so it can't slow the unit run or conflate signal | |
| # (an e2e flake reading as a unit failure) as the suite grows. | |
| # ``stable`` is a strict gate; ``dev`` is advisory because | |
| # ESPHome's nightly can break mid-day — we want the signal without | |
| # a permanent red on every device-builder PR. | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - channel: stable | |
| # No extra install — the ``[esphome]`` extra resolves the | |
| # stable release the dashboard ships with. | |
| install: "" | |
| allow_failure: false | |
| - channel: dev | |
| install: uv pip install --upgrade git+https://github.com/esphome/esphome.git@dev | |
| allow_failure: true | |
| continue-on-error: ${{ matrix.allow_failure }} | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Set up uv and Python 3.14 | |
| uses: ./.github/actions/setup-uv-python | |
| with: | |
| python-version: "3.14" | |
| - name: Create venv + install package + test deps | |
| run: | | |
| uv venv | |
| uv pip install -e '.[esphome,test]' | |
| - name: Install esphome (${{ matrix.channel }} channel) | |
| if: matrix.install != '' | |
| run: ${{ matrix.install }} | |
| - name: Show installed esphome version | |
| run: uv pip show esphome | grep -E '^(Name|Version):' | |
| - name: Cache PlatformIO core + per-platform toolchains | |
| # The LibreTiny e2e (test_libretiny_compile_download.py) runs a real | |
| # bk7231n compile, which clones the LibreTiny SDK from source on a | |
| # cold runner. Cache ``~/.platformio`` per channel so stable/dev | |
| # SDK downloads don't collide. The native-IDF e2e runs in its own | |
| # job below (different toolchain tree), so it isn't cached here. | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: ~/.platformio | |
| key: pio-e2e-libretiny-${{ runner.os }}-${{ matrix.channel }}-${{ hashFiles('tests/e2e/test_libretiny_compile_download.py') }} | |
| restore-keys: | | |
| pio-e2e-libretiny-${{ runner.os }}-${{ matrix.channel }}- | |
| - name: Run e2e tests | |
| # 20 min outer cap: a cold LibreTiny SDK clone plus compile runs | |
| # minutes. Fast tests keep ``--timeout=120``; the LibreTiny test | |
| # opts out via its own ``@pytest.mark.timeout(600)``. The native-IDF | |
| # e2e is excluded here and runs in the dev-only job below. | |
| timeout-minutes: 20 | |
| run: >- | |
| uv run pytest -q -n auto tests/e2e --maxfail=5 --durations=10 --timeout=120 --no-cov | |
| --ignore=tests/e2e/test_esp_idf_compile_download.py | |
| e2e-native-idf: | |
| name: E2E native ESP-IDF (esphome dev) | |
| runs-on: ubuntu-latest | |
| # The native ESP-IDF toolchain (esphome 2026.5.0+) is new and its cold | |
| # compile installs the full IDF toolchain (minutes). It runs on its own | |
| # runner, dev channel only and advisory (``continue-on-error``): it must | |
| # neither slow the main e2e matrix nor gate a PR on an esphome nightly | |
| # break. Fold it into the stable matrix once the toolchain has settled. | |
| continue-on-error: true | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.ref }} | |
| - name: Set up uv and Python 3.14 | |
| uses: ./.github/actions/setup-uv-python | |
| with: | |
| python-version: "3.14" | |
| - name: Create venv + install package + test deps | |
| run: | | |
| uv venv | |
| uv pip install -e '.[esphome,test]' | |
| - name: Install esphome (dev channel) | |
| run: uv pip install --upgrade git+https://github.com/esphome/esphome.git@dev | |
| - name: Show installed esphome version | |
| run: uv pip show esphome | grep -E '^(Name|Version):' | |
| - name: Cache ESP-IDF toolchain | |
| # Native IDF does NOT use PlatformIO; its toolchain installs under | |
| # ``ESPHOME_ESP_IDF_PREFIX`` (set on the run step). Without the cache | |
| # a cold IDF install + toolchain download runs on every PR. The | |
| # installed IDF version is pinned by ``esphome@dev`` (reinstalled | |
| # each run), not by the test file -- so use a rolling ``run_id`` key | |
| # that saves a fresh cache every run (picking up dev IDF bumps) and | |
| # ``restore-keys`` to warm from the most recent prior cache. | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 | |
| with: | |
| path: ~/.cache/esphome-idf | |
| key: e2e-espidf-${{ runner.os }}-${{ github.run_id }} | |
| restore-keys: | | |
| e2e-espidf-${{ runner.os }}- | |
| - name: Run native ESP-IDF e2e | |
| # A cold native-IDF compile installs the IDF toolchain then runs | |
| # ninja; the test's own ``@pytest.mark.timeout(900)`` caps it. | |
| # esphome ``expanduser()``s ESPHOME_ESP_IDF_PREFIX, so ``~`` matches | |
| # the cache path above. | |
| timeout-minutes: 25 | |
| env: | |
| ESPHOME_ESP_IDF_PREFIX: ~/.cache/esphome-idf | |
| run: uv run pytest -q tests/e2e/test_esp_idf_compile_download.py --durations=10 --timeout=900 --no-cov | |
| benchmarks: | |
| name: Run benchmarks (CodSpeed) | |
| runs-on: ubuntu-latest | |
| # Benchmarks only run on PRs to ``main`` and pushes to ``main`` — | |
| # ``workflow_call`` runs (release preflight) skip them since | |
| # CodSpeed's instrumentation harness adds non-trivial wallclock | |
| # to the matrix and the comparison only makes sense against the | |
| # historical baseline CodSpeed already has. | |
| if: github.event_name != 'workflow_call' | |
| steps: | |
| - name: Check out code from GitHub | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| - name: Set up Python | |
| uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version: "3.13" | |
| - name: Set up uv | |
| uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 | |
| with: | |
| enable-cache: true | |
| - name: Install package + test deps | |
| run: uv pip install --system -e '.[esphome,test]' | |
| - name: Run benchmarks | |
| # ``simulation`` is the new name for what used to be called | |
| # ``instrumentation`` — same callgrind-based runner under | |
| # the hood, just renamed. The action prints a deprecation | |
| # warning when you ask for ``instrumentation`` explicitly. | |
| # | |
| # ``--timeout=600`` overrides the 10s per-test default set | |
| # in ``pyproject.toml``. CodSpeed's callgrind instrumentation | |
| # multiplies each benchmark's wallclock by 10-50x, so a | |
| # microbenchmark that runs in 50ms outside the harness can | |
| # legitimately take a few minutes here. 10 minutes covers | |
| # worst-case (the catalog-load benchmark, which exercises | |
| # every BoardCatalogEntry's nested dataclass deserialisation) | |
| # with ~5x headroom over the observed 3-5 minute floor. | |
| uses: CodSpeedHQ/action@9d332c4d90b43981c3e55ae8e38e68709996240f # v4.17.0 | |
| with: | |
| mode: simulation | |
| # ``--durations 10`` prints the 10 slowest benchmark wall | |
| # times at the end of the run so a regression in any | |
| # single bench is visible in the CI log without having | |
| # to dig into the CodSpeed dashboard. | |
| run: pytest tests/benchmarks --codspeed --no-cov --timeout=600 --durations 10 |