Skip to content

Commit 0378fc4

Browse files
authored
Cross-load packed weight cache reuse for XNNPACK (pytorch#19988)
Differential Revision: D106717093 Pull Request resolved: pytorch#19988
1 parent 96a64ec commit 0378fc4

7 files changed

Lines changed: 999 additions & 121 deletions

File tree

backends/xnnpack/runtime/XNNPACKBackend.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,14 @@ class XnnpackBackend final
9595
// concurrent inits from resetting is_finalized_ or overwriting
9696
// named_data_map_ while compileModel is using the shared weights cache.
9797
std::unique_lock<std::mutex> lock_weights_cache(
98-
weights_cache_mutex_, std::defer_lock);
98+
options_.weights_cache_mutex(), std::defer_lock);
9999
if (use_weight_cache) {
100100
lock_weights_cache.lock();
101101

102102
const auto& cache_path = options_.get_packed_cache_path();
103-
if (!cache_path.empty()) {
104-
weights_cache_->set_packed_cache_path(cache_path);
105-
}
103+
options_.weights_cache().set_packed_cache_path(cache_path);
106104

107-
weights_cache_->initialize_for_runtime(
105+
options_.weights_cache().initialize_for_runtime(
108106
context.get_runtime_allocator(), named_data_map);
109107
workspace->set_uses_weight_cache();
110108
}
@@ -120,7 +118,7 @@ class XnnpackBackend final
120118
processed->data(),
121119
processed->size(),
122120
executor,
123-
weights_cache_.get(),
121+
&options_.weights_cache(),
124122
workspace_ptr,
125123
named_data_map,
126124
use_weight_cache);
@@ -149,7 +147,7 @@ class XnnpackBackend final
149147
auto workspace = executor->get_workspace();
150148

151149
std::unique_lock<std::mutex> lock_weights_cache(
152-
weights_cache_mutex_, std::defer_lock);
150+
options_.weights_cache_mutex(), std::defer_lock);
153151
if (executor->uses_weight_cache() || workspace->uses_weight_cache()) {
154152
lock_weights_cache.lock();
155153
}
@@ -180,14 +178,15 @@ class XnnpackBackend final
180178
auto workspace = executor->get_workspace();
181179

182180
const std::lock_guard<std::mutex> lock_weights_cache(
183-
weights_cache_mutex_);
181+
options_.weights_cache_mutex());
184182

185183
#ifdef ENABLE_XNNPACK_PROFILING
186184
executor->print_avg_op_timings();
187185
#endif
188186

189187
if (executor->uses_weight_cache()) {
190-
weights_cache_->delete_packed_data(executor->get_packed_data_names());
188+
options_.weights_cache().delete_packed_data(
189+
executor->get_packed_data_names());
191190
}
192191

193192
// This is needed to serialize access to xnn_delete_runtime which is not
@@ -218,27 +217,29 @@ class XnnpackBackend final
218217
Error set_option(
219218
BackendOptionContext& context,
220219
const Span<BackendOption>& backend_options) override {
220+
// Process every option even if one fails — applying a `packed_cache_path`
221+
// and triggering `save_weight_cache_on_disk` in the same array must not
222+
// depend on declaration order. Capture the first error and report it
223+
// after the loop. All option-key dispatch — including the disk-save
224+
// side effect — lives inside XnnpackBackendOptions::set_option, which
225+
// owns the weights-cache instance and its mutex.
226+
Error first_err = Error::Ok;
221227
for (const auto& option : backend_options) {
222228
Error err = options_.set_option(option);
223-
if (err != Error::Ok) {
224-
return err;
229+
if (err != Error::Ok && first_err == Error::Ok) {
230+
first_err = err;
225231
}
226232
}
227-
return Error::Ok;
233+
return first_err;
228234
}
229235

230236
private:
231237
mutable xnnpack::XnnpackBackendOptions options_;
232238

233-
// Weights cache is global to all delegate instances.
234-
mutable std::mutex weights_cache_mutex_;
235-
std::unique_ptr<XNNWeightsCache> weights_cache_ =
236-
std::make_unique<XNNWeightsCache>();
237-
238-
// Lock Hiearchy for Mutexes:
239-
// weights_cache_mutex_
240-
// workspace_meta_mutex_
241-
// workspace_mutex_ (owned by executor)
239+
// Lock hierarchy for mutexes:
240+
// options_.weights_cache_mutex()
241+
// workspace_meta_mutex_
242+
// workspace_mutex_ (owned by executor)
242243
};
243244

244245
namespace {

backends/xnnpack/runtime/XNNPACKBackend.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ const char weight_cache_option_key[] = "weight_cache_enabled";
2020
// @lint-ignore CLANGTIDY facebook-hte-CArray
2121
const char packed_cache_path_option_key[] = "packed_cache_path";
2222

23+
/// EXPERIMENTAL — option name and semantics may change without notice.
24+
///
25+
/// Setting this to `true` triggers persisting the packed weight cache to disk
26+
/// so a subsequent process load can mmap the same file and skip XNNPACK weight
27+
/// repacking. The on-disk path is configured via
28+
/// `packed_cache_path_option_key`. The disk write is a one-shot side effect
29+
/// (the value is not stored): every `true` set fires another save.
30+
// Must remain a C array for the BackendOptions template overloads.
31+
// @lint-ignore CLANGTIDY facebook-hte-CArray
32+
const char save_weight_cache_on_disk_option_key[] = "save_weight_cache_on_disk";
33+
2334
/// Workspace sharing mode. This is a backend option that can be set via the
2435
/// set_option API to control memory sharing between CALL_DELEGATE instances.
2536
/// This is useful for reducing memory consumption.

0 commit comments

Comments
 (0)