1212
1313#include < c10/util/safe_numerics.h>
1414
15+ #include < executorch/runtime/core/device_allocator.h>
1516#include < executorch/runtime/core/exec_aten/util/tensor_util.h>
1617
1718namespace executorch {
@@ -25,6 +26,9 @@ namespace {
2526 * ensures that they are managed together and have the same lifetime as the
2627 * Tensor. When the Tensor is destroyed, the Storage structure ensures
2728 * proper cleanup of the associated metadata and data if needed.
29+ *
30+ * For device tensors, the data pointer points to device memory; the deleter
31+ * is responsible for freeing it through the appropriate DeviceAllocator.
2832 */
2933struct Storage final {
3034 executorch::aten::TensorImpl tensor_impl;
@@ -47,6 +51,11 @@ struct Storage final {
4751 strides(std::move(strides)),
4852 deleter(std::move(deleter)) {}
4953
54+ Storage (const Storage&) = delete;
55+ Storage& operator =(const Storage&) = delete ;
56+ Storage (Storage&&) = delete;
57+ Storage& operator =(Storage&&) = delete ;
58+
5059 ~Storage () {
5160 if (deleter) {
5261 deleter (tensor_impl.mutable_data ());
@@ -63,7 +72,9 @@ TensorPtr make_tensor_ptr(
6372 std::vector<executorch::aten::StridesType> strides,
6473 executorch::aten::ScalarType type,
6574 executorch::aten::TensorShapeDynamism dynamism,
66- std::function<void (void *)> deleter) {
75+ std::function<void (void *)> deleter,
76+ runtime::etensor::DeviceType device_type,
77+ runtime::etensor::DeviceIndex device_index) {
6778 const auto dim = sizes.size ();
6879 ET_CHECK_MSG (
6980 dim_order.empty () || dim_order.size () == dim,
@@ -111,17 +122,25 @@ TensorPtr make_tensor_ptr(
111122 data,
112123 dim_order.data (),
113124 strides.data (),
114- dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC );
125+ dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC ,
126+ device_type,
127+ device_index);
115128 auto storage = std::make_shared<Storage>(
116129 std::move (tensor_impl),
117130 std::move (sizes),
118131 std::move (dim_order),
119132 std::move (strides),
120133 std::move (deleter));
121- const auto tensor_ptr = &storage->tensor ;
134+ const auto raw_tensor_ptr = &storage->tensor ;
122135 return std::shared_ptr<executorch::aten::Tensor>(
123- std::move (storage), tensor_ptr );
136+ std::move (storage), raw_tensor_ptr );
124137#else
138+ ET_CHECK_MSG (
139+ device_type == runtime::etensor::DeviceType::CPU ,
140+ " USE_ATEN_LIB build does not support non-CPU device tensors via make_tensor_ptr; "
141+ " got device_type=%d. Use the ExecuTorch portable build for device tensor support." ,
142+ static_cast <int >(device_type));
143+ (void )device_index;
125144 auto options = c10::TensorOptions ()
126145 .dtype (c10::scalarTypeToTypeMeta (type))
127146 .device (c10::kCPU );
@@ -271,5 +290,120 @@ runtime::Error resize_tensor_ptr(
271290 sizes.data (), sizes.size ()));
272291}
273292
293+ // ---- Device tensor helpers ----
294+ //
295+ // These helpers are only meaningful in the ExecuTorch portable build.
296+ // USE_ATEN_LIB cannot create on-device tensors via make_tensor_ptr, so cloning
297+ // to/from a device tensor is intentionally unsupported in that build.
298+
299+ #ifndef USE_ATEN_LIB
300+
301+ TensorPtr clone_tensor_ptr_to_device (
302+ const TensorPtr& cpu_tensor,
303+ runtime::etensor::DeviceType device_type,
304+ runtime::etensor::DeviceIndex device_index) {
305+ ET_CHECK_MSG (
306+ device_type != runtime::etensor::DeviceType::CPU ,
307+ " Target device must not be CPU; use clone_tensor_ptr for CPU-to-CPU copies." );
308+
309+ auto * allocator = runtime::get_device_allocator (device_type);
310+ ET_CHECK_MSG (
311+ allocator != nullptr ,
312+ " No device allocator registered for device type %d" ,
313+ static_cast <int >(device_type));
314+
315+ const auto nbytes = cpu_tensor->nbytes ();
316+ const auto * cpu_data = cpu_tensor->const_data_ptr ();
317+ ET_CHECK_MSG (cpu_data != nullptr , " Source tensor has no data." );
318+
319+ auto result = allocator->allocate (nbytes, device_index);
320+ ET_CHECK_MSG (result.ok (), " Failed to allocate device memory." );
321+ void * device_data = result.get ();
322+
323+ auto err = allocator->copy_host_to_device (
324+ device_data, cpu_data, nbytes, device_index);
325+ ET_CHECK_MSG (err == runtime::Error::Ok, " Host-to-device copy failed." );
326+
327+ std::vector<executorch::aten::SizesType> sizes (
328+ cpu_tensor->sizes ().begin (), cpu_tensor->sizes ().end ());
329+ std::vector<executorch::aten::DimOrderType> dim_order (
330+ cpu_tensor->dim_order ().begin (), cpu_tensor->dim_order ().end ());
331+ std::vector<executorch::aten::StridesType> strides (
332+ cpu_tensor->strides ().begin (), cpu_tensor->strides ().end ());
333+
334+ return make_tensor_ptr (
335+ std::move (sizes),
336+ device_data,
337+ std::move (dim_order),
338+ std::move (strides),
339+ cpu_tensor->scalar_type (),
340+ cpu_tensor->shape_dynamism (),
341+ [allocator, device_index](void * ptr) {
342+ allocator->deallocate (ptr, device_index);
343+ },
344+ device_type,
345+ device_index);
346+ }
347+
348+ TensorPtr clone_tensor_ptr_to_cpu (const TensorPtr& device_tensor) {
349+ const auto nbytes = device_tensor->nbytes ();
350+ const auto * device_data = device_tensor->const_data_ptr ();
351+ ET_CHECK_MSG (device_data != nullptr , " Source device tensor has no data." );
352+
353+ const auto device_type = device_tensor->unsafeGetTensorImpl ()->device_type ();
354+ const auto device_index =
355+ device_tensor->unsafeGetTensorImpl ()->device_index ();
356+ ET_CHECK_MSG (
357+ device_type != runtime::etensor::DeviceType::CPU ,
358+ " Source tensor is already on CPU." );
359+
360+ auto * allocator = runtime::get_device_allocator (device_type);
361+ ET_CHECK_MSG (
362+ allocator != nullptr ,
363+ " No device allocator registered for device type %d" ,
364+ static_cast <int >(device_type));
365+
366+ std::vector<uint8_t > cpu_data (nbytes);
367+
368+ auto err = allocator->copy_device_to_host (
369+ cpu_data.data (), device_data, nbytes, device_index);
370+ ET_CHECK_MSG (err == runtime::Error::Ok, " Device-to-host copy failed." );
371+
372+ std::vector<executorch::aten::SizesType> sizes (
373+ device_tensor->sizes ().begin (), device_tensor->sizes ().end ());
374+ std::vector<executorch::aten::DimOrderType> dim_order (
375+ device_tensor->dim_order ().begin (), device_tensor->dim_order ().end ());
376+ std::vector<executorch::aten::StridesType> strides (
377+ device_tensor->strides ().begin (), device_tensor->strides ().end ());
378+
379+ return make_tensor_ptr (
380+ std::move (sizes),
381+ std::move (cpu_data),
382+ std::move (dim_order),
383+ std::move (strides),
384+ device_tensor->scalar_type ());
385+ }
386+
387+ #else // USE_ATEN_LIB
388+
389+ TensorPtr clone_tensor_ptr_to_device (
390+ const TensorPtr& /* cpu_tensor*/ ,
391+ runtime::etensor::DeviceType /* device_type*/ ,
392+ runtime::etensor::DeviceIndex /* device_index*/ ) {
393+ ET_CHECK_MSG (
394+ false ,
395+ " clone_tensor_ptr_to_device is not supported in USE_ATEN_LIB builds; "
396+ " make_tensor_ptr cannot create on-device aten tensors." );
397+ }
398+
399+ TensorPtr clone_tensor_ptr_to_cpu (const TensorPtr& /* device_tensor*/ ) {
400+ ET_CHECK_MSG (
401+ false ,
402+ " clone_tensor_ptr_to_cpu is not supported in USE_ATEN_LIB builds; "
403+ " make_tensor_ptr cannot create on-device aten tensors." );
404+ }
405+
406+ #endif // USE_ATEN_LIB
407+
274408} // namespace extension
275409} // namespace executorch
0 commit comments