Skip to content

Commit 537fb3e

Browse files
authored
Merge branch 'main' into gh/gasoonjia/158/base
2 parents 722c19d + c27cc5d commit 537fb3e

14 files changed

Lines changed: 1353 additions & 1346 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
from executorch.backends.arm.test.common import parametrize
8+
from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
9+
from executorch.examples.models.silero_vad.export_silero_vad import (
10+
CONTEXT_SIZE,
11+
HIDDEN_DIM,
12+
SileroVAD16k,
13+
WINDOW_SIZE,
14+
)
15+
16+
17+
ops_before_transforms: dict[str, int] = {
18+
"executorch_exir_dialects_edge__ops_aten_abs_default": 2,
19+
"executorch_exir_dialects_edge__ops_aten_add_Tensor": 3,
20+
"executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
21+
"executorch_exir_dialects_edge__ops_aten_cat_default": 1,
22+
"executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
23+
"executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
24+
"executorch_exir_dialects_edge__ops_aten_linear_default": 2,
25+
"executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
26+
"executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
27+
"executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
28+
"executorch_exir_dialects_edge__ops_aten_relu_default": 5,
29+
"executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
30+
"executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
31+
"executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
32+
"executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
33+
"executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
34+
"executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
35+
"executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
36+
"executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
37+
"executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
38+
"executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
39+
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 12,
40+
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 11,
41+
}
42+
ops_after_transforms: dict[str, int] = {
43+
"executorch_exir_dialects_edge__ops_aten_abs_default": 2,
44+
"executorch_exir_dialects_edge__ops_aten_add_Tensor": 2,
45+
"executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
46+
"executorch_exir_dialects_edge__ops_aten_cat_default": 1,
47+
"executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
48+
"executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
49+
"executorch_exir_dialects_edge__ops_aten_linear_default": 2,
50+
"executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
51+
"executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
52+
"executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
53+
"executorch_exir_dialects_edge__ops_aten_relu_default": 5,
54+
"executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
55+
"executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
56+
"executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
57+
"executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
58+
"executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
59+
"executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
60+
"executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
61+
"executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
62+
"executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
63+
"executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
64+
"executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 6,
65+
"executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 6,
66+
"executorch_exir_dialects_edge__ops_cortex_m_quantized_add_default": 1,
67+
}
68+
69+
70+
pt_model = SileroVAD16k().eval()
71+
72+
x = torch.randn(
73+
1, CONTEXT_SIZE + WINDOW_SIZE
74+
) # (1, 576) — 64 context + 512 audio samples
75+
state = torch.zeros(2, 1, HIDDEN_DIM) # (2, 1, 128) — [h, c] LSTM state
76+
77+
test_cases = {
78+
"silero_vad_16k": McuTestCase(
79+
model=pt_model,
80+
example_inputs=lambda: (x, state),
81+
),
82+
}
83+
84+
85+
@parametrize("test_case", test_cases)
86+
def test_dialect_silero_vad_16k(test_case):
87+
"""This model currently does largely not lower to accelerated kernels due to missing LSTM and conv1d support, this test is to track development progress."""
88+
inputs = test_case.get_example_inputs()
89+
tester = CortexMTester(test_case.model, inputs)
90+
tester.test_dialect(
91+
ops_before_transforms,
92+
ops_after_transforms,
93+
qtol=10,
94+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from executorch.backends.arm.test.common import parametrize
7+
from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
8+
from executorch.examples.models.wav2letter.model import Wav2LetterModel
9+
10+
11+
ops_before_transforms: dict[str, int] = {}
12+
ops_after_transforms: dict[str, int] = {}
13+
14+
model = Wav2LetterModel()
15+
pt_model = model.get_eager_model()
16+
17+
test_cases = {
18+
"wav2letter": McuTestCase(
19+
model=pt_model,
20+
example_inputs=lambda: model.get_example_inputs(),
21+
),
22+
}
23+
24+
25+
@parametrize("test_case", test_cases)
26+
def test_dialect_wav2letter(test_case):
27+
"""This model currently does largely not lower to accelerated kernels due to missing conv1d support, this test is to track development progress."""
28+
inputs = test_case.get_example_inputs()
29+
tester = CortexMTester(test_case.model, inputs)
30+
tester.test_dialect(
31+
ops_before_transforms,
32+
ops_after_transforms,
33+
qtol=10,
34+
)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import pytest
7+
import torch
8+
from executorch.backends.arm.test.common import parametrize
9+
10+
from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
11+
12+
YOLO = pytest.importorskip(
13+
"ultralytics",
14+
reason="ultralytics is optional; install it locally to run YOLO tests.",
15+
).YOLO
16+
17+
18+
ops_before_transforms: dict[str, int] = {}
19+
ops_after_transforms: dict[str, int] = {}
20+
21+
22+
WEIGHTS = "yolo11n.pt"
23+
yolo = YOLO(WEIGHTS)
24+
pt_model = yolo.model.eval()
25+
26+
test_cases = {
27+
"yolo11n": McuTestCase(
28+
model=pt_model,
29+
example_inputs=lambda: (
30+
torch.randn(1, 3, 640, 640).to(memory_format=torch.channels_last),
31+
),
32+
),
33+
}
34+
35+
36+
@parametrize("test_case", test_cases)
37+
def test_dialect_yolo11(test_case):
38+
"""This model currently does not lower in the cortex-m backend, this test is to track development progress."""
39+
inputs = test_case.get_example_inputs()
40+
tester = CortexMTester(test_case.model, inputs)
41+
tester.test_dialect(
42+
ops_before_transforms,
43+
ops_after_transforms,
44+
qtol=10,
45+
)

backends/xnnpack/runtime/XNNPACKBackend.cpp

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <executorch/runtime/core/evalue.h>
1717
#include <executorch/runtime/executor/pte_data_map.h>
1818

19+
#include <cinttypes>
1920
#include <memory>
2021
#include <mutex>
2122

@@ -41,6 +42,13 @@ using executorch::runtime::FreeableBuffer;
4142
using executorch::runtime::Result;
4243
using executorch::runtime::Span;
4344

45+
// Global mutex for all XNNPACK operations. This is temporary, tracked by
46+
// T272407942.
47+
static std::mutex& global_xnnpack_mutex() {
48+
static std::mutex m;
49+
return m;
50+
}
51+
4452
class XnnpackBackend final
4553
: public ::executorch::ET_RUNTIME_NAMESPACE::BackendInterface {
4654
public:
@@ -66,6 +74,8 @@ class XnnpackBackend final
6674
BackendInitContext& context,
6775
FreeableBuffer* processed,
6876
ArrayRef<CompileSpec> compile_specs) const override {
77+
const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
78+
6979
auto executor = context.get_runtime_allocator()
7080
->allocateInstance<xnnpack::delegate::XNNExecutor>();
7181
if (executor == nullptr) {
@@ -129,22 +139,45 @@ class XnnpackBackend final
129139
Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err);
130140
return err;
131141
}
142+
143+
ET_LOG(
144+
Info,
145+
"XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
146+
" workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
147+
(void*)executor,
148+
workspace->id(),
149+
(void*)workspace_ptr,
150+
program_id,
151+
use_weight_cache ? "true" : "false");
152+
132153
return executor;
133154
}
134155

135156
Error execute(
136157
BackendExecutionContext& context,
137158
DelegateHandle* handle,
138159
Span<EValue*> args) const override {
160+
const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
161+
139162
auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
140163

164+
auto workspace = executor->get_workspace();
165+
ET_LOG(
166+
Info,
167+
"XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
168+
" num_args=%zu weight_cache=%s",
169+
(void*)executor,
170+
workspace->id(),
171+
(size_t)args.size(),
172+
executor->uses_weight_cache() ? "true" : "false");
173+
141174
std::unique_lock<std::mutex> lock_weights_cache(
142175
weights_cache_mutex_, std::defer_lock);
143176
if (executor->uses_weight_cache()) {
144177
lock_weights_cache.lock();
145178
}
146179

147-
auto [raii_lock, _] = executor->get_workspace()->acquire();
180+
auto [raii_lock, _] = workspace->acquire();
148181

149182
// Prepare Inputs/Outputs and Propagate Input Shapes
150183
Error err = executor->prepare_args(args);
@@ -161,12 +194,29 @@ class XnnpackBackend final
161194
// Convert output data types if necessary (e.g., int32 -> int64 for Long)
162195
err = executor->convert_outputs(args);
163196

197+
ET_LOG(
198+
Info,
199+
"XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
200+
" err=0x%x",
201+
(void*)executor,
202+
workspace->id(),
203+
(unsigned int)err);
204+
164205
return err;
165206
}
166207

167208
void destroy(DelegateHandle* handle) const override {
168209
if (handle != nullptr) {
210+
const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
211+
169212
auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
213+
auto workspace = executor->get_workspace();
214+
215+
ET_LOG(
216+
Info,
217+
"XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
218+
(void*)executor,
219+
workspace->id());
170220

171221
#ifdef ENABLE_XNNPACK_PROFILING
172222
executor->print_avg_op_timings();
@@ -183,7 +233,6 @@ class XnnpackBackend final
183233
// the same backend instance. Make sure to hold onto the workspace
184234
// shared_ptr, as the pointer in the executor is freed, which includes
185235
// the mutex referenced by raii_lock.
186-
auto workspace = executor->get_workspace();
187236
auto [raii_lock, _] = workspace->acquire();
188237

189238
// XNNExecutor is not trivially destructible. Since this was constructed

extension/android/BUCK

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,14 @@ non_fbcode_target(_kind = fb_android_library,
4747
name = "executorch_llama",
4848
warnings_as_errors = False,
4949
srcs = [
50-
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java",
51-
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java",
52-
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java",
53-
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java",
50+
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt",
51+
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt",
52+
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt",
53+
"executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt",
5454
],
5555
autoglob = False,
56-
language = "JAVA",
56+
language = "KOTLIN",
57+
extra_kotlinc_arguments = ["-Xjvm-default=all"],
5758
deps = [
5859
":executorch",
5960
"//fbandroid/java/com/facebook/jni:jni",

extension/android/executorch_android/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ android {
5151
}
5252
kotlinOptions {
5353
jvmTarget = "11"
54+
freeCompilerArgs += ["-Xjvm-default=all"]
5455
}
5556
}
5657

extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java renamed to extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,42 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
package org.pytorch.executorch.extension.llm;
9+
package org.pytorch.executorch.extension.llm
1010

11-
import com.facebook.jni.annotations.DoNotStrip;
12-
import org.pytorch.executorch.annotations.Experimental;
11+
import com.facebook.jni.annotations.DoNotStrip
12+
import org.pytorch.executorch.annotations.Experimental
1313

1414
/**
15-
* Callback interface for Llama model. Users can implement this interface to receive the generated
15+
* Callback interface for Llm model. Users can implement this interface to receive the generated
1616
* tokens and statistics.
1717
*
18-
* <p>Warning: These APIs are experimental and subject to change without notice
18+
* Warning: These APIs are experimental and subject to change without notice
1919
*/
2020
@Experimental
21-
public interface LlmCallback {
21+
interface LlmCallback {
2222
/**
2323
* Called when a new result is available from JNI. Users will keep getting onResult() invocations
2424
* until generate() finishes.
2525
*
2626
* @param result Last generated token
2727
*/
28-
@DoNotStrip
29-
public void onResult(String result);
28+
@DoNotStrip fun onResult(result: String)
3029

3130
/**
3231
* Called when the statistics for the generate() is available.
3332
*
34-
* <p>The result will be a JSON string. See extension/llm/stats.h for the field definitions.
33+
* The result will be a JSON string. See extension/llm/stats.h for the field definitions.
3534
*
3635
* @param stats JSON string containing the statistics for the generate()
3736
*/
38-
@DoNotStrip
39-
default void onStats(String stats) {}
37+
@DoNotStrip fun onStats(stats: String) {}
4038

4139
/**
4240
* Called when an error occurs during generate().
4341
*
44-
* @param errorCode Error code from the ExecuTorch runtime (see {@link
45-
* org.pytorch.executorch.ExecutorchRuntimeException})
42+
* @param errorCode Error code from the ExecuTorch runtime (see
43+
* [org.pytorch.executorch.ExecutorchRuntimeException])
4644
* @param message Human-readable error description
4745
*/
48-
@DoNotStrip
49-
default void onError(int errorCode, String message) {}
46+
@DoNotStrip fun onError(errorCode: Int, message: String) {}
5047
}

0 commit comments

Comments
 (0)