Skip to content

Commit e1af3f7

Browse files
committed
[ET Device Support] Add device tensor helper functions to TensorPtr API
Pull Request resolved: #18761 Add clone_tensor_ptr_to_device and clone_tensor_ptr_to_cpu to tensor_ptr.h for cloning tensors between host and device memory via DeviceAllocatorRegistry. Extend the existing make_tensor_ptr(const TensorPtr&, ...) overload with optional device_type/device_index parameters (default CPU/0) for seamless device placement. ghstack-source-id: 381929818 @exported-using-ghexport Differential Revision: [D99913077](https://our.internmc.facebook.com/intern/diff/D99913077/)
1 parent 406abaf commit e1af3f7

7 files changed

Lines changed: 711 additions & 29 deletions

File tree

extension/tensor/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def define_common_targets():
2424
],
2525
visibility = ["PUBLIC"],
2626
deps = [
27+
"//executorch/runtime/core:device_allocator",
2728
"//executorch/runtime/core/exec_aten/util:dim_order_util" + aten_suffix,
2829
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
2930
],

extension/tensor/tensor_ptr.cpp

Lines changed: 138 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <c10/util/safe_numerics.h>
1414

15+
#include <executorch/runtime/core/device_allocator.h>
1516
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
1617

1718
namespace executorch {
@@ -25,6 +26,9 @@ namespace {
2526
* ensures that they are managed together and have the same lifetime as the
2627
* Tensor. When the Tensor is destroyed, the Storage structure ensures
2728
* proper cleanup of the associated metadata and data if needed.
29+
*
30+
* For device tensors, the data pointer points to device memory; the deleter
31+
* is responsible for freeing it through the appropriate DeviceAllocator.
2832
*/
2933
struct Storage final {
3034
executorch::aten::TensorImpl tensor_impl;
@@ -47,6 +51,11 @@ struct Storage final {
4751
strides(std::move(strides)),
4852
deleter(std::move(deleter)) {}
4953

54+
Storage(const Storage&) = delete;
55+
Storage& operator=(const Storage&) = delete;
56+
Storage(Storage&&) = delete;
57+
Storage& operator=(Storage&&) = delete;
58+
5059
~Storage() {
5160
if (deleter) {
5261
deleter(tensor_impl.mutable_data());
@@ -63,7 +72,9 @@ TensorPtr make_tensor_ptr(
6372
std::vector<executorch::aten::StridesType> strides,
6473
executorch::aten::ScalarType type,
6574
executorch::aten::TensorShapeDynamism dynamism,
66-
std::function<void(void*)> deleter) {
75+
std::function<void(void*)> deleter,
76+
runtime::etensor::DeviceType device_type,
77+
runtime::etensor::DeviceIndex device_index) {
6778
const auto dim = sizes.size();
6879
ET_CHECK_MSG(
6980
dim_order.empty() || dim_order.size() == dim,
@@ -111,17 +122,25 @@ TensorPtr make_tensor_ptr(
111122
data,
112123
dim_order.data(),
113124
strides.data(),
114-
dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC);
125+
dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC,
126+
device_type,
127+
device_index);
115128
auto storage = std::make_shared<Storage>(
116129
std::move(tensor_impl),
117130
std::move(sizes),
118131
std::move(dim_order),
119132
std::move(strides),
120133
std::move(deleter));
121-
const auto tensor_ptr = &storage->tensor;
134+
const auto raw_tensor_ptr = &storage->tensor;
122135
return std::shared_ptr<executorch::aten::Tensor>(
123-
std::move(storage), tensor_ptr);
136+
std::move(storage), raw_tensor_ptr);
124137
#else
138+
ET_CHECK_MSG(
139+
device_type == runtime::etensor::DeviceType::CPU,
140+
"USE_ATEN_LIB build does not support non-CPU device tensors via make_tensor_ptr; "
141+
"got device_type=%d. Use the ExecuTorch portable build for device tensor support.",
142+
static_cast<int>(device_type));
143+
(void)device_index;
125144
auto options = c10::TensorOptions()
126145
.dtype(c10::scalarTypeToTypeMeta(type))
127146
.device(c10::kCPU);
@@ -271,5 +290,120 @@ runtime::Error resize_tensor_ptr(
271290
sizes.data(), sizes.size()));
272291
}
273292

293+
// ---- Device tensor helpers ----
294+
//
295+
// These helpers are only meaningful in the ExecuTorch portable build.
296+
// USE_ATEN_LIB cannot create on-device tensors via make_tensor_ptr, so cloning
297+
// to/from a device tensor is intentionally unsupported in that build.
298+
299+
#ifndef USE_ATEN_LIB
300+
301+
TensorPtr clone_tensor_ptr_to_device(
302+
const TensorPtr& cpu_tensor,
303+
runtime::etensor::DeviceType device_type,
304+
runtime::etensor::DeviceIndex device_index) {
305+
ET_CHECK_MSG(
306+
device_type != runtime::etensor::DeviceType::CPU,
307+
"Target device must not be CPU; use clone_tensor_ptr for CPU-to-CPU copies.");
308+
309+
auto* allocator = runtime::get_device_allocator(device_type);
310+
ET_CHECK_MSG(
311+
allocator != nullptr,
312+
"No device allocator registered for device type %d",
313+
static_cast<int>(device_type));
314+
315+
const auto nbytes = cpu_tensor->nbytes();
316+
const auto* cpu_data = cpu_tensor->const_data_ptr();
317+
ET_CHECK_MSG(cpu_data != nullptr, "Source tensor has no data.");
318+
319+
auto result = allocator->allocate(nbytes, device_index);
320+
ET_CHECK_MSG(result.ok(), "Failed to allocate device memory.");
321+
void* device_data = result.get();
322+
323+
auto err = allocator->copy_host_to_device(
324+
device_data, cpu_data, nbytes, device_index);
325+
ET_CHECK_MSG(err == runtime::Error::Ok, "Host-to-device copy failed.");
326+
327+
std::vector<executorch::aten::SizesType> sizes(
328+
cpu_tensor->sizes().begin(), cpu_tensor->sizes().end());
329+
std::vector<executorch::aten::DimOrderType> dim_order(
330+
cpu_tensor->dim_order().begin(), cpu_tensor->dim_order().end());
331+
std::vector<executorch::aten::StridesType> strides(
332+
cpu_tensor->strides().begin(), cpu_tensor->strides().end());
333+
334+
return make_tensor_ptr(
335+
std::move(sizes),
336+
device_data,
337+
std::move(dim_order),
338+
std::move(strides),
339+
cpu_tensor->scalar_type(),
340+
cpu_tensor->shape_dynamism(),
341+
[allocator, device_index](void* ptr) {
342+
allocator->deallocate(ptr, device_index);
343+
},
344+
device_type,
345+
device_index);
346+
}
347+
348+
TensorPtr clone_tensor_ptr_to_cpu(const TensorPtr& device_tensor) {
349+
const auto nbytes = device_tensor->nbytes();
350+
const auto* device_data = device_tensor->const_data_ptr();
351+
ET_CHECK_MSG(device_data != nullptr, "Source device tensor has no data.");
352+
353+
const auto device_type = device_tensor->unsafeGetTensorImpl()->device_type();
354+
const auto device_index =
355+
device_tensor->unsafeGetTensorImpl()->device_index();
356+
ET_CHECK_MSG(
357+
device_type != runtime::etensor::DeviceType::CPU,
358+
"Source tensor is already on CPU.");
359+
360+
auto* allocator = runtime::get_device_allocator(device_type);
361+
ET_CHECK_MSG(
362+
allocator != nullptr,
363+
"No device allocator registered for device type %d",
364+
static_cast<int>(device_type));
365+
366+
std::vector<uint8_t> cpu_data(nbytes);
367+
368+
auto err = allocator->copy_device_to_host(
369+
cpu_data.data(), device_data, nbytes, device_index);
370+
ET_CHECK_MSG(err == runtime::Error::Ok, "Device-to-host copy failed.");
371+
372+
std::vector<executorch::aten::SizesType> sizes(
373+
device_tensor->sizes().begin(), device_tensor->sizes().end());
374+
std::vector<executorch::aten::DimOrderType> dim_order(
375+
device_tensor->dim_order().begin(), device_tensor->dim_order().end());
376+
std::vector<executorch::aten::StridesType> strides(
377+
device_tensor->strides().begin(), device_tensor->strides().end());
378+
379+
return make_tensor_ptr(
380+
std::move(sizes),
381+
std::move(cpu_data),
382+
std::move(dim_order),
383+
std::move(strides),
384+
device_tensor->scalar_type());
385+
}
386+
387+
#else // USE_ATEN_LIB
388+
389+
TensorPtr clone_tensor_ptr_to_device(
390+
const TensorPtr& /*cpu_tensor*/,
391+
runtime::etensor::DeviceType /*device_type*/,
392+
runtime::etensor::DeviceIndex /*device_index*/) {
393+
ET_CHECK_MSG(
394+
false,
395+
"clone_tensor_ptr_to_device is not supported in USE_ATEN_LIB builds; "
396+
"make_tensor_ptr cannot create on-device aten tensors.");
397+
}
398+
399+
TensorPtr clone_tensor_ptr_to_cpu(const TensorPtr& /*device_tensor*/) {
400+
ET_CHECK_MSG(
401+
false,
402+
"clone_tensor_ptr_to_cpu is not supported in USE_ATEN_LIB builds; "
403+
"make_tensor_ptr cannot create on-device aten tensors.");
404+
}
405+
406+
#endif // USE_ATEN_LIB
407+
274408
} // namespace extension
275409
} // namespace executorch

0 commit comments

Comments
 (0)