pytorch
diff --git a/‎backends/xnnpack/runtime/XNNPACKBackend.cpp‎
Lines changed: 22 additions & 21 deletions b/‎backends/xnnpack/runtime/XNNPACKBackend.cpp‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎backends/xnnpack/runtime/XNNPACKBackend.h‎
Lines changed: 11 additions & 0 deletions b/‎backends/xnnpack/runtime/XNNPACKBackend.h‎
Lines changed: 11 additions & 0 deletions
@@ -95,16 +95,14 @@ class XnnpackBackend final
     // concurrent inits from resetting is_finalized_ or overwriting
     // named_data_map_ while compileModel is using the shared weights cache.
     std::unique_lock<std::mutex> lock_weights_cache(
-        weights_cache_mutex_, std::defer_lock);
+        options_.weights_cache_mutex(), std::defer_lock);
     if (use_weight_cache) {
       lock_weights_cache.lock();
 
       const auto& cache_path = options_.get_packed_cache_path();
-      if (!cache_path.empty()) {
-        weights_cache_->set_packed_cache_path(cache_path);
-      }
+      options_.weights_cache().set_packed_cache_path(cache_path);
 
-      weights_cache_->initialize_for_runtime(
+      options_.weights_cache().initialize_for_runtime(
           context.get_runtime_allocator(), named_data_map);
       workspace->set_uses_weight_cache();
     }
@@ -120,7 +118,7 @@ class XnnpackBackend final
         processed->data(),
         processed->size(),
         executor,
-        weights_cache_.get(),
+        &options_.weights_cache(),
         workspace_ptr,
         named_data_map,
         use_weight_cache);
@@ -149,7 +147,7 @@ class XnnpackBackend final
     auto workspace = executor->get_workspace();
 
     std::unique_lock<std::mutex> lock_weights_cache(
-        weights_cache_mutex_, std::defer_lock);
+        options_.weights_cache_mutex(), std::defer_lock);
     if (executor->uses_weight_cache() || workspace->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
@@ -180,14 +178,15 @@ class XnnpackBackend final
       auto workspace = executor->get_workspace();
 
       const std::lock_guard<std::mutex> lock_weights_cache(
-          weights_cache_mutex_);
+          options_.weights_cache_mutex());
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
 #endif
 
       if (executor->uses_weight_cache()) {
-        weights_cache_->delete_packed_data(executor->get_packed_data_names());
+        options_.weights_cache().delete_packed_data(
+            executor->get_packed_data_names());
       }
 
       // This is needed to serialize access to xnn_delete_runtime which is not
@@ -218,27 +217,29 @@ class XnnpackBackend final
   Error set_option(
       BackendOptionContext& context,
       const Span<BackendOption>& backend_options) override {
+    // Process every option even if one fails — applying a `packed_cache_path`
+    // and triggering `save_weight_cache_on_disk` in the same array must not
+    // depend on declaration order. Capture the first error and report it
+    // after the loop. All option-key dispatch — including the disk-save
+    // side effect — lives inside XnnpackBackendOptions::set_option, which
+    // owns the weights-cache instance and its mutex.
+    Error first_err = Error::Ok;
     for (const auto& option : backend_options) {
       Error err = options_.set_option(option);
-      if (err != Error::Ok) {
-        return err;
+      if (err != Error::Ok && first_err == Error::Ok) {
+        first_err = err;
       }
     }
-    return Error::Ok;
+    return first_err;
   }
 
  private:
   mutable xnnpack::XnnpackBackendOptions options_;
 
-  // Weights cache is global to all delegate instances.
-  mutable std::mutex weights_cache_mutex_;
-  std::unique_ptr<XNNWeightsCache> weights_cache_ =
-      std::make_unique<XNNWeightsCache>();
-
-  // Lock Hiearchy for Mutexes:
-  // weights_cache_mutex_
-  // workspace_meta_mutex_
-  // workspace_mutex_ (owned by executor)
+  // Lock hierarchy for mutexes:
+  //   options_.weights_cache_mutex()
+  //   workspace_meta_mutex_
+  //   workspace_mutex_ (owned by executor)
 };
 
 namespace {
 
@@ -20,6 +20,17 @@ const char weight_cache_option_key[] = "weight_cache_enabled";
 // @lint-ignore CLANGTIDY facebook-hte-CArray
 const char packed_cache_path_option_key[] = "packed_cache_path";
 
+/// EXPERIMENTAL — option name and semantics may change without notice.
+///
+/// Setting this to `true` triggers persisting the packed weight cache to disk
+/// so a subsequent process load can mmap the same file and skip XNNPACK weight
+/// repacking. The on-disk path is configured via
+/// `packed_cache_path_option_key`. The disk write is a one-shot side effect
+/// (the value is not stored): every `true` set fires another save.
+// Must remain a C array for the BackendOptions template overloads.
+// @lint-ignore CLANGTIDY facebook-hte-CArray
+const char save_weight_cache_on_disk_option_key[] = "save_weight_cache_on_disk";
+
 /// Workspace sharing mode. This is a backend option that can be set via the
 /// set_option API to control memory sharing between CALL_DELEGATE instances.
 /// This is useful for reducing memory consumption.