Skip to content

Commit 6d965bc

Browse files
committed
[ET Device Support] DeviceMemoryBuffer RAII class for device memory lifetime management
Pull Request resolved: #18473 Introduces DeviceMemoryBuffer, an RAII wrapper that owns a single device memory allocation. On destruction, it automatically calls DeviceAllocator::deallocate() to free the memory. This mirrors the role of std::vector<uint8_t> for CPU planned buffers, but for non-cpu device memory (CUDA, etc.). Key features: - Static factory create(size, type, index) looks up DeviceAllocator from registry - Move-only semantics (no copy) to enforce single ownership - as_span() accessor wraps device pointer for use with HierarchicalAllocator - Destructor is no-op for default-constructed or moved-from instances ghstack-source-id: 381283216 @exported-using-ghexport Differential Revision: [D97850709](https://our.internmc.facebook.com/intern/diff/D97850709/)
1 parent f37836b commit 6d965bc

6 files changed

Lines changed: 372 additions & 0 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/runtime/core/device_memory_buffer.h>
10+
11+
namespace executorch::runtime {
12+
13+
Result<DeviceMemoryBuffer> DeviceMemoryBuffer::create(
14+
size_t size,
15+
etensor::DeviceType type,
16+
etensor::DeviceIndex index,
17+
size_t alignment) {
18+
DeviceAllocator* allocator = get_device_allocator(type);
19+
if (allocator == nullptr) {
20+
ET_LOG(
21+
Error,
22+
"No device allocator registered for device type %d",
23+
static_cast<int>(type));
24+
return Error::NotFound;
25+
}
26+
27+
auto result = allocator->allocate(size, index, alignment);
28+
if (!result.ok()) {
29+
return result.error();
30+
}
31+
32+
return DeviceMemoryBuffer(result.get(), size, allocator, index);
33+
}
34+
35+
} // namespace executorch::runtime
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <cstddef>
12+
#include <cstdint>
13+
14+
#include <executorch/runtime/core/device_allocator.h>
15+
#include <executorch/runtime/core/result.h>
16+
#include <executorch/runtime/core/span.h>
17+
18+
namespace executorch::runtime {
19+
20+
/**
21+
* RAII wrapper that owns a single device memory allocation.
22+
*
23+
* On destruction, calls DeviceAllocator::deallocate() to free the memory.
24+
* This mirrors the role of std::vector<uint8_t> for CPU planned buffers,
25+
* but for device memory (CUDA, etc.).
26+
*
27+
* Move-only: cannot be copied, but can be moved to transfer ownership.
28+
*/
29+
class DeviceMemoryBuffer final {
30+
public:
31+
/**
32+
* Creates a DeviceMemoryBuffer by allocating device memory.
33+
*
34+
* Looks up the DeviceAllocator for the given device type via the
35+
* DeviceAllocatorRegistry. If no allocator is registered for the type,
36+
* returns Error::NotFound.
37+
*
38+
* @param size Number of bytes to allocate.
39+
* @param type The device type (e.g., CUDA).
40+
* @param index The device index (e.g., 0 for cuda:0).
41+
* @param alignment Minimum alignment of the returned pointer in bytes.
42+
* Must be a power of 2. Defaults to DeviceAllocator::kDefaultAlignment.
43+
* @return A Result containing the DeviceMemoryBuffer on success, or an error.
44+
*/
45+
static Result<DeviceMemoryBuffer> create(
46+
size_t size,
47+
etensor::DeviceType type,
48+
etensor::DeviceIndex index = 0,
49+
size_t alignment = DeviceAllocator::kDefaultAlignment);
50+
51+
DeviceMemoryBuffer() = default;
52+
53+
~DeviceMemoryBuffer() {
54+
if (ptr_ != nullptr && allocator_ != nullptr) {
55+
allocator_->deallocate(ptr_, device_index_);
56+
}
57+
}
58+
59+
// Move constructor: transfer ownership.
60+
DeviceMemoryBuffer(DeviceMemoryBuffer&& other) noexcept
61+
: ptr_(other.ptr_),
62+
size_(other.size_),
63+
allocator_(other.allocator_),
64+
device_index_(other.device_index_) {
65+
other.ptr_ = nullptr;
66+
other.size_ = 0;
67+
other.allocator_ = nullptr;
68+
}
69+
70+
// Move assignment: release current, take ownership.
71+
DeviceMemoryBuffer& operator=(DeviceMemoryBuffer&& other) noexcept {
72+
if (this != &other) {
73+
if (ptr_ != nullptr && allocator_ != nullptr) {
74+
allocator_->deallocate(ptr_, device_index_);
75+
}
76+
ptr_ = other.ptr_;
77+
size_ = other.size_;
78+
allocator_ = other.allocator_;
79+
device_index_ = other.device_index_;
80+
other.ptr_ = nullptr;
81+
other.size_ = 0;
82+
other.allocator_ = nullptr;
83+
}
84+
return *this;
85+
}
86+
87+
// Non-copyable.
88+
DeviceMemoryBuffer(const DeviceMemoryBuffer&) = delete;
89+
DeviceMemoryBuffer& operator=(const DeviceMemoryBuffer&) = delete;
90+
91+
/// Returns the device pointer, or nullptr if empty/moved-from.
92+
void* data() const {
93+
return ptr_;
94+
}
95+
96+
/// Returns the size in bytes of the allocation.
97+
size_t size() const {
98+
return size_;
99+
}
100+
101+
/**
102+
* Returns a Span<uint8_t> wrapping the device pointer.
103+
*
104+
* This is intended for use with HierarchicalAllocator, which only performs
105+
* pointer arithmetic on the span data and never dereferences it. Device
106+
* pointers are valid for pointer arithmetic from the CPU side.
107+
*/
108+
Span<uint8_t> as_span() const {
109+
return {static_cast<uint8_t*>(ptr_), size_};
110+
}
111+
112+
private:
113+
DeviceMemoryBuffer(
114+
void* ptr,
115+
size_t size,
116+
DeviceAllocator* allocator,
117+
etensor::DeviceIndex device_index)
118+
: ptr_(ptr),
119+
size_(size),
120+
allocator_(allocator),
121+
device_index_(device_index) {}
122+
123+
void* ptr_ = nullptr;
124+
size_t size_ = 0;
125+
DeviceAllocator* allocator_ = nullptr;
126+
etensor::DeviceIndex device_index_ = 0;
127+
};
128+
129+
} // namespace executorch::runtime

runtime/core/portable_type/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def define_common_targets():
2828
"//executorch/extension/fb/dynamic_shim/...",
2929
"//executorch/kernels/portable/cpu/...",
3030
"//executorch/runtime/core:device_allocator",
31+
"//executorch/runtime/core/...",
3132
"//executorch/runtime/core/exec_aten/...",
3233
"//executorch/runtime/core/portable_type/test/...",
3334
],

runtime/core/targets.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ def define_common_targets():
155155
visibility = ["//executorch/..."],
156156
)
157157

158+
runtime.cxx_library(
159+
name = "device_memory_buffer",
160+
srcs = ["device_memory_buffer.cpp"],
161+
exported_headers = ["device_memory_buffer.h"],
162+
exported_deps = [
163+
":core",
164+
":device_allocator",
165+
],
166+
visibility = ["PUBLIC"],
167+
)
168+
158169
runtime.cxx_library(
159170
name = "tag",
160171
srcs = ["tag.cpp"],
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/runtime/core/device_memory_buffer.h>
10+
11+
#include <gtest/gtest.h>
12+
13+
#include <executorch/runtime/platform/runtime.h>
14+
15+
using executorch::runtime::DeviceAllocator;
16+
using executorch::runtime::DeviceMemoryBuffer;
17+
using executorch::runtime::Error;
18+
using executorch::runtime::get_device_allocator;
19+
using executorch::runtime::register_device_allocator;
20+
using executorch::runtime::Result;
21+
using executorch::runtime::etensor::DeviceIndex;
22+
using executorch::runtime::etensor::DeviceType;
23+
24+
/**
25+
* A mock DeviceAllocator for testing DeviceMemoryBuffer.
26+
* Returns pointers into a local buffer and tracks call counts.
27+
*/
28+
class MockAllocator : public DeviceAllocator {
29+
public:
30+
explicit MockAllocator(DeviceType type) : type_(type) {}
31+
32+
Result<void*> allocate(
33+
size_t nbytes,
34+
DeviceIndex index,
35+
size_t alignment = DeviceAllocator::kDefaultAlignment) override {
36+
allocate_count_++;
37+
last_allocate_size_ = nbytes;
38+
last_allocate_alignment_ = alignment;
39+
return static_cast<void*>(buffer_);
40+
}
41+
42+
void deallocate(void* ptr, DeviceIndex index) override {
43+
deallocate_count_++;
44+
last_deallocate_ptr_ = ptr;
45+
}
46+
47+
Error copy_host_to_device(
48+
void* dst,
49+
const void* src,
50+
size_t nbytes,
51+
DeviceIndex index) override {
52+
return Error::Ok;
53+
}
54+
55+
Error copy_device_to_host(
56+
void* dst,
57+
const void* src,
58+
size_t nbytes,
59+
DeviceIndex index) override {
60+
return Error::Ok;
61+
}
62+
63+
DeviceType device_type() const override {
64+
return type_;
65+
}
66+
67+
int allocate_count_ = 0;
68+
int deallocate_count_ = 0;
69+
size_t last_allocate_size_ = 0;
70+
size_t last_allocate_alignment_ = 0;
71+
void* last_deallocate_ptr_ = nullptr;
72+
uint8_t buffer_[256] = {};
73+
74+
private:
75+
DeviceType type_;
76+
};
77+
78+
// Global mock registered once before all tests run.
79+
static MockAllocator g_mock_cuda(DeviceType::CUDA);
80+
81+
class DeviceMemoryBufferTest : public ::testing::Test {
82+
protected:
83+
static void SetUpTestSuite() {
84+
executorch::runtime::runtime_init();
85+
register_device_allocator(&g_mock_cuda);
86+
}
87+
88+
void SetUp() override {
89+
// Reset counters before each test.
90+
g_mock_cuda.allocate_count_ = 0;
91+
g_mock_cuda.deallocate_count_ = 0;
92+
g_mock_cuda.last_allocate_size_ = 0;
93+
g_mock_cuda.last_allocate_alignment_ = 0;
94+
g_mock_cuda.last_deallocate_ptr_ = nullptr;
95+
}
96+
};
97+
98+
TEST_F(DeviceMemoryBufferTest, DefaultConstructedIsEmpty) {
99+
DeviceMemoryBuffer buf;
100+
EXPECT_EQ(buf.data(), nullptr);
101+
EXPECT_EQ(buf.size(), 0);
102+
103+
auto span = buf.as_span();
104+
EXPECT_EQ(span.data(), nullptr);
105+
EXPECT_EQ(span.size(), 0);
106+
}
107+
108+
TEST_F(DeviceMemoryBufferTest, CreateAllocatesAndDestructorDeallocates) {
109+
{
110+
auto result = DeviceMemoryBuffer::create(1024, DeviceType::CUDA, 0);
111+
ASSERT_TRUE(result.ok());
112+
113+
auto buf = std::move(result.get());
114+
EXPECT_NE(buf.data(), nullptr);
115+
EXPECT_EQ(buf.size(), 1024);
116+
EXPECT_EQ(g_mock_cuda.allocate_count_, 1);
117+
EXPECT_EQ(g_mock_cuda.last_allocate_size_, 1024);
118+
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
119+
}
120+
EXPECT_EQ(g_mock_cuda.deallocate_count_, 1);
121+
EXPECT_EQ(g_mock_cuda.last_deallocate_ptr_, g_mock_cuda.buffer_);
122+
}
123+
124+
TEST_F(DeviceMemoryBufferTest, CreateFailsWithNoRegisteredAllocator) {
125+
auto result = DeviceMemoryBuffer::create(512, DeviceType::CPU, 0);
126+
EXPECT_FALSE(result.ok());
127+
EXPECT_EQ(result.error(), Error::NotFound);
128+
}
129+
130+
TEST_F(DeviceMemoryBufferTest, MoveConstructorTransfersOwnership) {
131+
auto result = DeviceMemoryBuffer::create(256, DeviceType::CUDA, 0);
132+
ASSERT_TRUE(result.ok());
133+
auto original = std::move(result.get());
134+
void* original_ptr = original.data();
135+
136+
DeviceMemoryBuffer moved(std::move(original));
137+
138+
EXPECT_EQ(original.data(), nullptr);
139+
EXPECT_EQ(original.size(), 0);
140+
EXPECT_EQ(moved.data(), original_ptr);
141+
EXPECT_EQ(moved.size(), 256);
142+
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
143+
}
144+
145+
TEST_F(DeviceMemoryBufferTest, MoveAssignmentTransfersOwnership) {
146+
auto result = DeviceMemoryBuffer::create(128, DeviceType::CUDA, 0);
147+
ASSERT_TRUE(result.ok());
148+
auto original = std::move(result.get());
149+
void* original_ptr = original.data();
150+
151+
DeviceMemoryBuffer target;
152+
target = std::move(original);
153+
154+
EXPECT_EQ(original.data(), nullptr);
155+
EXPECT_EQ(target.data(), original_ptr);
156+
EXPECT_EQ(target.size(), 128);
157+
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
158+
}
159+
160+
TEST_F(DeviceMemoryBufferTest, DestructorNoOpForDefaultConstructed) {
161+
{ DeviceMemoryBuffer buf; }
162+
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
163+
}
164+
165+
TEST_F(DeviceMemoryBufferTest, AsSpanWrapsDevicePointer) {
166+
auto result = DeviceMemoryBuffer::create(2048, DeviceType::CUDA, 0);
167+
ASSERT_TRUE(result.ok());
168+
auto buf = std::move(result.get());
169+
170+
auto span = buf.as_span();
171+
EXPECT_EQ(span.data(), static_cast<uint8_t*>(buf.data()));
172+
EXPECT_EQ(span.size(), 2048);
173+
}
174+
175+
TEST_F(DeviceMemoryBufferTest, CreateUsesDefaultAlignmentWhenUnspecified) {
176+
auto result = DeviceMemoryBuffer::create(1024, DeviceType::CUDA, 0);
177+
ASSERT_TRUE(result.ok());
178+
EXPECT_EQ(
179+
g_mock_cuda.last_allocate_alignment_, DeviceAllocator::kDefaultAlignment);
180+
}
181+
182+
TEST_F(DeviceMemoryBufferTest, CreateForwardsCustomAlignmentToAllocator) {
183+
constexpr size_t kCustomAlignment = 512;
184+
auto result =
185+
DeviceMemoryBuffer::create(1024, DeviceType::CUDA, 0, kCustomAlignment);
186+
ASSERT_TRUE(result.ok());
187+
EXPECT_EQ(g_mock_cuda.last_allocate_alignment_, kCustomAlignment);
188+
}

runtime/core/test/targets.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ def define_common_targets():
77
TARGETS and BUCK files that call this function.
88
"""
99

10+
runtime.cxx_test(
11+
name = "device_memory_buffer_test",
12+
srcs = ["device_memory_buffer_test.cpp"],
13+
deps = [
14+
"//executorch/runtime/core:device_memory_buffer",
15+
],
16+
)
17+
1018
runtime.cxx_test(
1119
name = "span_test",
1220
srcs = ["span_test.cpp"],

0 commit comments

Comments
 (0)