pytorch
diff --git a/‎backends/cortex_m/test/models/test_silero_vad.py‎
Lines changed: 94 additions & 0 deletions b/‎backends/cortex_m/test/models/test_silero_vad.py‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎backends/cortex_m/test/models/test_wav2letter.py‎
Lines changed: 34 additions & 0 deletions b/‎backends/cortex_m/test/models/test_wav2letter.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎backends/cortex_m/test/models/test_yolo11.py‎
Lines changed: 45 additions & 0 deletions b/‎backends/cortex_m/test/models/test_yolo11.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎backends/xnnpack/runtime/XNNPACKBackend.cpp‎
Lines changed: 51 additions & 2 deletions b/‎backends/xnnpack/runtime/XNNPACKBackend.cpp‎
Lines changed: 51 additions & 2 deletions
diff --git a/‎extension/android/BUCK‎
Lines changed: 6 additions & 5 deletions b/‎extension/android/BUCK‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎extension/android/executorch_android/build.gradle‎
Lines changed: 1 addition & 0 deletions b/‎extension/android/executorch_android/build.gradle‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎…xecutorch/extension/llm/LlmCallback.java‎ ‎…/executorch/extension/llm/LlmCallback.kt‎extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java renamed to extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt
Lines changed: 12 additions & 15 deletions b/‎…xecutorch/extension/llm/LlmCallback.java‎ ‎…/executorch/extension/llm/LlmCallback.kt‎extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java renamed to extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt
Lines changed: 12 additions & 15 deletions
@@ -0,0 +1,94 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.test.common import parametrize
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+from executorch.examples.models.silero_vad.export_silero_vad import (
+    CONTEXT_SIZE,
+    HIDDEN_DIM,
+    SileroVAD16k,
+    WINDOW_SIZE,
+)
+
+
+ops_before_transforms: dict[str, int] = {
+    "executorch_exir_dialects_edge__ops_aten_abs_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_add_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
+    "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
+    "executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
+    "executorch_exir_dialects_edge__ops_aten_linear_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
+    "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
+    "executorch_exir_dialects_edge__ops_aten_relu_default": 5,
+    "executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
+    "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
+    "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
+    "executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 12,
+    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 11,
+}
+ops_after_transforms: dict[str, int] = {
+    "executorch_exir_dialects_edge__ops_aten_abs_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_add_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_arange_start_step": 1,
+    "executorch_exir_dialects_edge__ops_aten_cat_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_convolution_default": 6,
+    "executorch_exir_dialects_edge__ops_aten_index_Tensor": 1,
+    "executorch_exir_dialects_edge__ops_aten_linear_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_mean_dim": 1,
+    "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 3,
+    "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
+    "executorch_exir_dialects_edge__ops_aten_relu_default": 5,
+    "executorch_exir_dialects_edge__ops_aten_select_copy_int": 2,
+    "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 4,
+    "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_sqrt_default": 1,
+    "executorch_exir_dialects_edge__ops_aten_squeeze_copy_dims": 2,
+    "executorch_exir_dialects_edge__ops_aten_sub_Tensor": 2,
+    "executorch_exir_dialects_edge__ops_aten_tanh_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_unsqueeze_copy_default": 2,
+    "executorch_exir_dialects_edge__ops_aten_view_copy_default": 1,
+    "executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 6,
+    "executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 6,
+    "executorch_exir_dialects_edge__ops_cortex_m_quantized_add_default": 1,
+}
+
+
+pt_model = SileroVAD16k().eval()
+
+x = torch.randn(
+    1, CONTEXT_SIZE + WINDOW_SIZE
+)  # (1, 576) — 64 context + 512 audio samples
+state = torch.zeros(2, 1, HIDDEN_DIM)  # (2, 1, 128) — [h, c] LSTM state
+
+test_cases = {
+    "silero_vad_16k": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: (x, state),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_silero_vad_16k(test_case):
+    """This model currently does largely not lower to accelerated kernels due to missing LSTM and conv1d support, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )
@@ -0,0 +1,34 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm.test.common import parametrize
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+from executorch.examples.models.wav2letter.model import Wav2LetterModel
+
+
+ops_before_transforms: dict[str, int] = {}
+ops_after_transforms: dict[str, int] = {}
+
+model = Wav2LetterModel()
+pt_model = model.get_eager_model()
+
+test_cases = {
+    "wav2letter": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: model.get_example_inputs(),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_wav2letter(test_case):
+    """This model currently does largely not lower to accelerated kernels due to missing conv1d support, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )
@@ -0,0 +1,45 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pytest
+import torch
+from executorch.backends.arm.test.common import parametrize
+
+from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
+
+YOLO = pytest.importorskip(
+    "ultralytics",
+    reason="ultralytics is optional; install it locally to run YOLO tests.",
+).YOLO
+
+
+ops_before_transforms: dict[str, int] = {}
+ops_after_transforms: dict[str, int] = {}
+
+
+WEIGHTS = "yolo11n.pt"
+yolo = YOLO(WEIGHTS)
+pt_model = yolo.model.eval()
+
+test_cases = {
+    "yolo11n": McuTestCase(
+        model=pt_model,
+        example_inputs=lambda: (
+            torch.randn(1, 3, 640, 640).to(memory_format=torch.channels_last),
+        ),
+    ),
+}
+
+
+@parametrize("test_case", test_cases)
+def test_dialect_yolo11(test_case):
+    """This model currently does not lower in the cortex-m backend, this test is to track development progress."""
+    inputs = test_case.get_example_inputs()
+    tester = CortexMTester(test_case.model, inputs)
+    tester.test_dialect(
+        ops_before_transforms,
+        ops_after_transforms,
+        qtol=10,
+    )
@@ -16,6 +16,7 @@
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/executor/pte_data_map.h>
 
+#include <cinttypes>
 #include <memory>
 #include <mutex>
 
@@ -41,6 +42,13 @@ using executorch::runtime::FreeableBuffer;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
+// Global mutex for all XNNPACK operations. This is temporary, tracked by
+// T272407942.
+static std::mutex& global_xnnpack_mutex() {
+  static std::mutex m;
+  return m;
+}
+
 class XnnpackBackend final
     : public ::executorch::ET_RUNTIME_NAMESPACE::BackendInterface {
  public:
@@ -66,6 +74,8 @@ class XnnpackBackend final
       BackendInitContext& context,
       FreeableBuffer* processed,
       ArrayRef<CompileSpec> compile_specs) const override {
+    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
     auto executor = context.get_runtime_allocator()
                         ->allocateInstance<xnnpack::delegate::XNNExecutor>();
     if (executor == nullptr) {
@@ -129,22 +139,45 @@ class XnnpackBackend final
           Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err);
       return err;
     }
+
+    ET_LOG(
+        Info,
+        "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64
+        " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (void*)workspace_ptr,
+        program_id,
+        use_weight_cache ? "true" : "false");
+
     return executor;
   }
 
   Error execute(
       BackendExecutionContext& context,
       DelegateHandle* handle,
       Span<EValue*> args) const override {
+    const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
+    auto workspace = executor->get_workspace();
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64
+        " num_args=%zu weight_cache=%s",
+        (void*)executor,
+        workspace->id(),
+        (size_t)args.size(),
+        executor->uses_weight_cache() ? "true" : "false");
+
     std::unique_lock<std::mutex> lock_weights_cache(
         weights_cache_mutex_, std::defer_lock);
     if (executor->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
 
-    auto [raii_lock, _] = executor->get_workspace()->acquire();
+    auto [raii_lock, _] = workspace->acquire();
 
     // Prepare Inputs/Outputs and Propagate Input Shapes
     Error err = executor->prepare_args(args);
@@ -161,12 +194,29 @@ class XnnpackBackend final
     // Convert output data types if necessary (e.g., int32 -> int64 for Long)
     err = executor->convert_outputs(args);
 
+    ET_LOG(
+        Info,
+        "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64
+        " err=0x%x",
+        (void*)executor,
+        workspace->id(),
+        (unsigned int)err);
+
     return err;
   }
 
   void destroy(DelegateHandle* handle) const override {
     if (handle != nullptr) {
+      const std::lock_guard<std::mutex> global_lock(global_xnnpack_mutex());
+
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
+      auto workspace = executor->get_workspace();
+
+      ET_LOG(
+          Info,
+          "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64,
+          (void*)executor,
+          workspace->id());
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
@@ -183,7 +233,6 @@ class XnnpackBackend final
       // the same backend instance. Make sure to hold onto the workspace
       // shared_ptr, as the pointer in the executor is freed, which includes
       // the mutex referenced by raii_lock.
-      auto workspace = executor->get_workspace();
       auto [raii_lock, _] = workspace->acquire();
 
       // XNNExecutor is not trivially destructible. Since this was constructed
 
@@ -47,13 +47,14 @@ non_fbcode_target(_kind = fb_android_library,
     name = "executorch_llama",
     warnings_as_errors = False,
     srcs = [
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.java",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmCallback.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmGenerationConfig.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.kt",
+        "executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModuleConfig.kt",
     ],
     autoglob = False,
-    language = "JAVA",
+    language = "KOTLIN",
+    extra_kotlinc_arguments = ["-Xjvm-default=all"],
     deps = [
         ":executorch",
         "//fbandroid/java/com/facebook/jni:jni",
 
@@ -51,6 +51,7 @@ android {
     }
     kotlinOptions {
         jvmTarget = "11"
+        freeCompilerArgs += ["-Xjvm-default=all"]
     }
 }
 
 
@@ -6,45 +6,42 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-package org.pytorch.executorch.extension.llm;
+package org.pytorch.executorch.extension.llm
 
-import com.facebook.jni.annotations.DoNotStrip;
-import org.pytorch.executorch.annotations.Experimental;
+import com.facebook.jni.annotations.DoNotStrip
+import org.pytorch.executorch.annotations.Experimental
 
 /**
- * Callback interface for Llama model. Users can implement this interface to receive the generated
+ * Callback interface for Llm model. Users can implement this interface to receive the generated
  * tokens and statistics.
  *
- * <p>Warning: These APIs are experimental and subject to change without notice
+ * Warning: These APIs are experimental and subject to change without notice
  */
 @Experimental
-public interface LlmCallback {
+interface LlmCallback {
   /**
    * Called when a new result is available from JNI. Users will keep getting onResult() invocations
    * until generate() finishes.
    *
    * @param result Last generated token
    */
-  @DoNotStrip
-  public void onResult(String result);
+  @DoNotStrip fun onResult(result: String)
 
   /**
    * Called when the statistics for the generate() is available.
    *
-   * <p>The result will be a JSON string. See extension/llm/stats.h for the field definitions.
+   * The result will be a JSON string. See extension/llm/stats.h for the field definitions.
    *
    * @param stats JSON string containing the statistics for the generate()
    */
-  @DoNotStrip
-  default void onStats(String stats) {}
+  @DoNotStrip fun onStats(stats: String) {}
 
   /**
    * Called when an error occurs during generate().
    *
-   * @param errorCode Error code from the ExecuTorch runtime (see {@link
-   *     org.pytorch.executorch.ExecutorchRuntimeException})
+   * @param errorCode Error code from the ExecuTorch runtime (see
+   *   [org.pytorch.executorch.ExecutorchRuntimeException])
    * @param message Human-readable error description
    */
-  @DoNotStrip
-  default void onError(int errorCode, String message) {}
+  @DoNotStrip fun onError(errorCode: Int, message: String) {}
 }
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ android {`
`51`	`51`	`}`
`52`	`52`	`kotlinOptions {`
`53`	`53`	`jvmTarget = "11"`
	`54`	`+ freeCompilerArgs += ["-Xjvm-default=all"]`
`54`	`55`	`}`
`55`	`56`	`}`
`56`	`57`