Skip to content

Commit 6258320

Browse files
authored
Merge branch 'main' into feature/standalone-runner
2 parents 1a65a1e + 3b5d18d commit 6258320

22 files changed

Lines changed: 300 additions & 172 deletions

File tree

.ci/docker/build.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,6 @@ esac
9797
TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
9898
BUILD_DOCS=1
9999

100-
if [[ "${GCC_VERSION:-}" == "11" && -z "${SKIP_PYTORCH:-}" ]]; then
101-
PYTORCH_BUILD_MAX_JOBS=6
102-
fi
103-
104100
# Copy requirements-lintrunner.txt from root to here
105101
cp ../../requirements-lintrunner.txt ./
106102

@@ -113,7 +109,6 @@ docker build \
113109
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
114110
--build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
115111
--build-arg "TORCH_VERSION=${TORCH_VERSION}" \
116-
--build-arg "PYTORCH_BUILD_MAX_JOBS=${PYTORCH_BUILD_MAX_JOBS:-}" \
117112
--build-arg "BUCK2_VERSION=${BUCK2_VERSION}" \
118113
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
119114
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
release/2.12
1+
release/2.11

.ci/docker/common/install_cache.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,6 @@ init_sccache() {
7676
# This is the remote cache bucket
7777
export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
7878
export SCCACHE_S3_KEY_PREFIX=executorch
79-
export SCCACHE_REGION=us-east-1
80-
export AWS_REGION=us-east-1
81-
export AWS_DEFAULT_REGION=us-east-1
8279
export SCCACHE_IDLE_TIMEOUT=0
8380
export SCCACHE_ERROR_LOG=/tmp/sccache_error.log
8481
export RUST_LOG=sccache::server=error

.ci/docker/common/install_pytorch.sh

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,14 @@ install_pytorch_and_domains() {
2727
chown -R ci-user .
2828

2929
export _GLIBCXX_USE_CXX11_ABI=1
30-
if [[ "$(uname -m)" == "aarch64" ]]; then
31-
export BUILD_IGNORE_SVE_UNAVAILABLE=1
32-
fi
33-
if [[ -n "${PYTORCH_BUILD_MAX_JOBS:-}" ]]; then
34-
export MAX_JOBS="${PYTORCH_BUILD_MAX_JOBS}"
35-
fi
3630
# Then build and install PyTorch
3731
conda_run python setup.py bdist_wheel
3832
pip_install "$(echo dist/*.whl)"
3933

4034
# Grab the pinned audio and vision commits from PyTorch
4135
TORCHAUDIO_VERSION=release/2.11
4236
export TORCHAUDIO_VERSION
43-
TORCHVISION_VERSION=release/0.27
37+
TORCHVISION_VERSION=release/0.26
4438
export TORCHVISION_VERSION
4539

4640
install_domains

.ci/docker/ubuntu/Dockerfile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,9 @@ RUN bash ./install_cache.sh && rm install_cache.sh utils.sh
6262
ENV SCCACHE_BUCKET ossci-compiler-cache-circleci-v2
6363
ENV SCCACHE_S3_KEY_PREFIX executorch
6464
ENV SCCACHE_REGION us-east-1
65-
ENV AWS_REGION us-east-1
66-
ENV AWS_DEFAULT_REGION us-east-1
6765

6866
ARG TORCH_VERSION
6967
ARG SKIP_PYTORCH
70-
ARG PYTORCH_BUILD_MAX_JOBS
7168
COPY ./common/install_pytorch.sh install_pytorch.sh
7269
COPY ./common/utils.sh utils.sh
7370
RUN if [ -z "${SKIP_PYTORCH}" ]; then bash ./install_pytorch.sh; fi && rm install_pytorch.sh utils.sh

.ci/scripts/utils.sh

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ install_pytorch_and_domains() {
107107
local torch_release=$(cat version.txt)
108108
# Download key must match the upload key below (basename of dist/*.whl,
109109
# which always carries setup.py's resolved +gitHASH). Branch-ref pins
110-
# like `release/2.12` would otherwise produce `+gitrelease` here and
110+
# like `release/2.11` would otherwise produce `+gitrelease` here and
111111
# never hit the cache.
112112
local torch_short_hash=$(git rev-parse --short=7 HEAD)
113113
local torch_wheel_path="cached_artifacts/pytorch/executorch/pytorch_wheels/${system_name}/${python_version}"
@@ -132,9 +132,6 @@ install_pytorch_and_domains() {
132132
# (e.g. executorch's requirements-ci.txt).
133133
pip install -r requirements-build.txt
134134
git submodule update --init --recursive
135-
if [[ "$(uname -m)" == "aarch64" ]]; then
136-
export BUILD_IGNORE_SVE_UNAVAILABLE=1
137-
fi
138135
USE_DISTRIBUTED=1 python setup.py bdist_wheel
139136
pip install "$(echo dist/*.whl)"
140137

@@ -178,7 +175,7 @@ install_pytorch_and_domains() {
178175
# Grab the pinned audio and vision commits from PyTorch
179176
TORCHAUDIO_VERSION=release/2.11
180177
export TORCHAUDIO_VERSION
181-
TORCHVISION_VERSION=release/0.27
178+
TORCHVISION_VERSION=release/0.26
182179
export TORCHVISION_VERSION
183180

184181
install_domains

.github/workflows/mlx.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,10 @@ jobs:
120120
--prompt-len 4 \
121121
--max-new-tokens 5 2>&1)
122122
echo "$OUTPUT"
123-
if echo "$OUTPUT" | grep -q "Generated token ids: \[167, 94, 253, 88, 227\]"; then
123+
if echo "$OUTPUT" | grep -q "Generated token ids: \[167, 167, 81, 167, 81\]"; then
124124
echo "Success: Qwen 3.5 MoE MLX export + inference completed with expected output"
125125
else
126-
echo "Failed: unexpected output (expected [167, 94, 253, 88, 227])"
126+
echo "Failed: unexpected output (expected [167, 167, 81, 167, 81])"
127127
exit 1
128128
fi
129129
echo "::endgroup::"

backends/arm/README.md

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,33 @@ Developers who need local source builds can use:
116116
The current flow lowers to TOSA and converts to VGF for use in external projects,
117117
so the `executor_runner` is not typically used here.
118118

119+
### Compiling models with the Python API
120+
121+
Use the Python API as the primary way to compile your own models. It lets you
122+
keep model construction, export inputs, quantization, custom passes, and artifact
123+
generation in your application code. The `aot_arm_compiler.py` script is useful
124+
for simple examples and smoke tests, but production code should call the
125+
ExecuTorch and Arm backend APIs directly.
126+
127+
The delegated Python API flow is:
128+
129+
1. Prepare the model and representative example inputs.
130+
2. Create a target-specific Arm compile spec.
131+
3. Export the model with `torch.export.export`.
132+
4. Optionally quantize with the target-specific Arm quantizer and re-export the
133+
quantized graph.
134+
5. Create the matching Arm partitioner from the compile spec.
135+
6. Lower with `to_edge_transform_and_lower`.
136+
7. Convert to an ExecuTorch program and save the PTE file.
137+
138+
For complete examples of that flow, including quantization and target-specific
139+
compile specs, see:
140+
141+
- `docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md`
142+
- `docs/source/backends/arm-vgf/tutorials/vgf-getting-started.md`
143+
144+
Additional examples are available in `examples/arm`.
145+
119146
### Direct Drive (experimental, Ethos-U85 on Linux) workflow
120147

121148
Direct Drive enables execution on Ethos-U85 via the Linux driver stack.
@@ -159,7 +186,8 @@ scp -P 2222 arm_test/cmake-out/executor_runner root@127.0.0.1:/tmp/
159186

160187
#### Direct Drive model (PTE) workflow
161188

162-
Create a PTE file:
189+
For a quick test with the example `add` model,
190+
`aot_arm_compiler.py` can be used:
163191

164192
```
165193
python3 -m backends.arm.scripts.aot_arm_compiler \
@@ -170,16 +198,30 @@ python3 -m backends.arm.scripts.aot_arm_compiler \
170198
--direct_drive
171199
```
172200

201+
For production use, the Python API described in
202+
[Compiling models with the Python API](#compiling-models-with-the-python-api)
203+
should be used. Use an Ethos-U85 target and set the Direct Drive `extra_flags` when creating the `EthosUCompileSpec`:
204+
205+
```python
206+
compile_spec = EthosUCompileSpec(
207+
target="ethos-u85-256",
208+
extra_flags=["--separate-io-regions", "--cop-format=COP2"],
209+
)
210+
```
211+
212+
Then save the generated program as e.g. `model.pte` or
213+
update the copy and run commands below to match your output file name.
214+
173215
Copy the `executor_runner` binary and the generated PTE file to the running FVP:
174216

175217
```
176-
scp -P 2222 arm_test/cmake-out/executor_runner add_arm_delegate_ethos-u85-256.pte root@127.0.0.1:/tmp/
218+
scp -P 2222 arm_test/cmake-out/executor_runner model.pte root@127.0.0.1:/tmp/
177219
```
178220

179221
Run the model on the FVP:
180222

181223
```
182-
ssh -p 2222 root@127.0.0.1 -t "/tmp/executor_runner -model_path /tmp/add_arm_delegate_ethos-u85-256.pte -num_executions 1"
224+
ssh -p 2222 root@127.0.0.1 -t "/tmp/executor_runner -model_path /tmp/model.pte -num_executions 1"
183225
```
184226

185227
## Testing

backends/arm/_passes/arm_pass.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@
99
from abc import abstractmethod
1010
from typing import Any, List, Optional, Set, Type
1111

12-
import torch
1312
from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
1413
from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
1514
from executorch.exir.dialects._ops import ops as exir_ops
1615
from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
1716
from torch.fx import GraphModule
1817
from torch.fx.passes.infra.pass_base import PassResult
19-
from torch.utils import _pytree as pytree
2018

2119

2220
class ArmPass(ExportPass):
@@ -81,13 +79,6 @@ def get_name(pass_) -> str:
8179
)
8280

8381
def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
84-
if (
85-
op == exir_ops.edge.aten.bmm.default
86-
and isinstance(meta, NodeMetadata)
87-
and len(meta.data.get("input_qparams", {})) > 0
88-
):
89-
return self._call_quantized_bmm_without_fake_kernel(op, args, kwargs, meta)
90-
9182
if not updated:
9283
return super().call_operator(op, args, kwargs, meta)
9384

@@ -100,35 +91,6 @@ def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False)
10091
new_meta["stack_trace"] = f"{old_stack_trace}\n{traceback.format_stack()[-2]}"
10192
return super().call_operator(op, args, kwargs, NodeMetadata(new_meta))
10293

103-
def _call_quantized_bmm_without_fake_kernel(
104-
self,
105-
op,
106-
args: tuple[ProxyValue, ...],
107-
kwargs: dict[str, Any],
108-
meta: NodeMetadata,
109-
) -> ProxyValue:
110-
old_val = meta.data["val"]
111-
output_qparams = meta.data.get("output_qparams", {})
112-
dtype = (
113-
next(iter(output_qparams.values())).dtype
114-
if len(output_qparams) > 0
115-
else old_val.dtype
116-
)
117-
res_data = torch.empty_like(old_val, dtype=dtype)
118-
119-
args_proxy, kwargs_proxy = pytree.tree_map_only(
120-
ProxyValue, lambda x: x.proxy, (args, kwargs)
121-
)
122-
res_proxy = self.tracer.create_proxy(
123-
"call_function",
124-
op,
125-
args_proxy,
126-
kwargs_proxy,
127-
)
128-
res_proxy.node.meta.update(meta.data)
129-
self.tracer.set_metadata(res_proxy.node, res_data)
130-
return ProxyValue(res_data, res_proxy)
131-
13294
def call_submodule(
13395
self, graph_module: GraphModule, inputs: tuple[Any, ...]
13496
) -> PassResult:

backends/nxp/tests/generic_tests/test_per_channel_conversion.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -169,19 +169,14 @@ def test_per_channel_convolution(self, _, use_qat: bool):
169169
atol=1.0,
170170
)
171171

172-
conv_nodes = [
173-
node
174-
for node in exported_program.graph.nodes
175-
if node.target == exir_ops.edge.aten.convolution.default
176-
]
177-
assert len(conv_nodes) == 1
178-
179-
conv_node = conv_nodes[0]
172+
nodes = list(exported_program.graph.nodes)
173+
180174
assert (
181-
conv_node.args[1].target
175+
nodes[8].target
182176
== exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
183177
)
184178
assert (
185-
conv_node.args[2].target
179+
nodes[9].target
186180
== exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
187181
)
182+
assert nodes[10].target == exir_ops.edge.aten.convolution.default

0 commit comments

Comments
 (0)