Skip to content

Commit 99730ed

Browse files
committed
- Adding iterator for read-only buffers to optimize for sequential reads.
- Small changes for code review.
1 parent 72c050b commit 99730ed

3 files changed

Lines changed: 184 additions & 4 deletions

File tree

src/processing/byte_buffer.cpp

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,15 @@ size_t ByteBuffer::EstimateOffsetsReserveCountFromSample(tcb::span<const uint8_t
134134
++sampled_elements;
135135
}
136136

137+
// If no elements were sampled or the cursor didn't move, it means the buffer is empty. So return 0.
137138
if (sampled_elements == 0 || cursor == 0)
138139
return 0;
139140

140141
// If sampling consumed the full buffer (<= sample window), we already know the exact count.
141142
if (cursor == bytes.size())
142143
return sampled_elements;
143144

144-
// Estimate total element count from sample density:
145-
// (sampled_elements / sampled_bytes) * total_bytes.
145+
// Estimate total element count from sample density: (sampled_elements / sampled_bytes) * total_bytes.
146146
// - sampled_elements / sampled_bytes gives "elements per byte" in the sampled prefix,
147147
// - then multiplying by total_bytes extrapolates a full-buffer estimate.
148148
const long double estimated =
@@ -151,11 +151,13 @@ size_t ByteBuffer::EstimateOffsetsReserveCountFromSample(tcb::span<const uint8_t
151151
const size_t estimated_count = static_cast<size_t>(std::ceil(estimated));
152152
const size_t estimated_with_headroom =
153153
static_cast<size_t>(std::ceil(static_cast<long double>(estimated_count) * 1.1L));
154+
155+
// If the count of sampled elements is more than the estimated, conservatively return actual count.
154156
return std::max(estimated_with_headroom, sampled_elements);
155157
}
156158

157159
// -----------------------------------------------------------------------------
158-
// Span reader methods
160+
// Element span reader methods
159161
// -----------------------------------------------------------------------------
160162

161163
size_t ByteBuffer::CalculateOffsetOfElement(size_t position) const {
@@ -189,6 +191,99 @@ tcb::span<const uint8_t> ByteBuffer::GetElement(size_t position) const {
189191
return elements_span_.subspan(offset + kSizePrefixBytes, element_size);
190192
}
191193

194+
// -----------------------------------------------------------------------------
195+
// Elemment span iterator
196+
//
197+
// Allows an alternative read of elements_span_ without need for lazy initialization of offsets_,
198+
// so saving execution time when the traversal of the buffer is strictly sequential.
199+
// This is the most common behavior when reading elements in single threaded mode.
200+
// -----------------------------------------------------------------------------
201+
202+
ByteBuffer::ConstIterator::ConstIterator(const ByteBuffer* buffer, size_t cursor_offset)
203+
: buffer_(buffer), cursor_offset_(cursor_offset) {}
204+
205+
void ByteBuffer::ConstIterator::ValidateFixedSizeElementAtCursor() const {
206+
if (buffer_->element_size_ <= 0) {
207+
throw InvalidInputException("Invalid fixed-size buffer: element_size must be greater than zero");
208+
}
209+
if ((buffer_->elements_span_.size() - cursor_offset_) < buffer_->element_size_) {
210+
throw InvalidInputException("Malformed fixed-size buffer: truncated element payload");
211+
}
212+
}
213+
214+
size_t ByteBuffer::ConstIterator::ReadAndValidateVariableElementSizeAtCursor() const {
215+
if ((buffer_->elements_span_.size() - cursor_offset_) < kSizePrefixBytes) {
216+
throw InvalidInputException("Malformed variable-size buffer: truncated length prefix");
217+
}
218+
const size_t current_element_size = ReadSizeAt(buffer_->elements_span_, cursor_offset_);
219+
const size_t payload_offset = cursor_offset_ + kSizePrefixBytes;
220+
if ((buffer_->elements_span_.size() - payload_offset) < current_element_size) {
221+
throw InvalidInputException("Malformed variable-size buffer: truncated element payload");
222+
}
223+
return current_element_size;
224+
}
225+
226+
ByteBuffer::ConstIterator::value_type ByteBuffer::ConstIterator::operator*() const {
227+
if (buffer_ == nullptr || cursor_offset_ >= buffer_->elements_span_.size()) {
228+
throw InvalidInputException("Cannot dereference ByteBuffer iterator at end position");
229+
}
230+
if (buffer_->has_fixed_sized_elements_) {
231+
ValidateFixedSizeElementAtCursor();
232+
return buffer_->elements_span_.subspan(cursor_offset_, buffer_->element_size_);
233+
}
234+
235+
const size_t current_element_size = ReadAndValidateVariableElementSizeAtCursor();
236+
const size_t payload_offset = cursor_offset_ + kSizePrefixBytes;
237+
return buffer_->elements_span_.subspan(payload_offset, current_element_size);
238+
}
239+
240+
ByteBuffer::ConstIterator& ByteBuffer::ConstIterator::operator++() {
241+
if (buffer_ == nullptr || cursor_offset_ >= buffer_->elements_span_.size()) {
242+
return *this;
243+
}
244+
if (buffer_->has_fixed_sized_elements_) {
245+
ValidateFixedSizeElementAtCursor();
246+
cursor_offset_ += buffer_->element_size_;
247+
return *this;
248+
}
249+
250+
const size_t current_element_size = ReadAndValidateVariableElementSizeAtCursor();
251+
cursor_offset_ += (kSizePrefixBytes + current_element_size);
252+
return *this;
253+
}
254+
255+
bool ByteBuffer::ConstIterator::operator==(const ConstIterator& other) const {
256+
return buffer_ == other.buffer_ && cursor_offset_ == other.cursor_offset_;
257+
}
258+
259+
bool ByteBuffer::ConstIterator::operator!=(const ConstIterator& other) const {
260+
return !(*this == other);
261+
}
262+
263+
void ByteBuffer::ValidateIteratorReadPreconditions() const {
264+
if (is_write_buffer_initialized_) {
265+
throw InvalidInputException("Iterator is only available for read buffers");
266+
}
267+
if (has_fixed_sized_elements_) {
268+
if (element_size_ <= 0) {
269+
throw InvalidInputException("Invalid fixed-size buffer: element_size must be greater than zero");
270+
}
271+
if ((elements_span_.size() % element_size_) != 0) {
272+
throw InvalidInputException("Malformed fixed-size buffer: buffer does not align with element_size");
273+
}
274+
}
275+
}
276+
277+
ByteBuffer::ConstIterator ByteBuffer::begin() const {
278+
ValidateIteratorReadPreconditions();
279+
return ConstIterator(this, 0u);
280+
}
281+
282+
ByteBuffer::ConstIterator ByteBuffer::end() const {
283+
ValidateIteratorReadPreconditions();
284+
return ConstIterator(this, elements_span_.size());
285+
}
286+
192287
// -----------------------------------------------------------------------------
193288
// Constructors and initializers for write buffer
194289
// -----------------------------------------------------------------------------

src/processing/byte_buffer.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <cstddef>
2121
#include <cstdint>
22+
#include <iterator>
2223
#include <limits>
2324
#include <vector>
2425

@@ -57,13 +58,48 @@ class ByteBuffer {
5758
// Finalizes the write path and transfers the resulting buffer ownership.
5859
std::vector<uint8_t> FinalizeAndTakeBuffer();
5960

61+
// Iterator for read-only elements.
62+
class ConstIterator {
63+
public:
64+
// Iterator traits consumed indirectly by STL iterator machinery.
65+
using iterator_category = std::forward_iterator_tag;
66+
using value_type = tcb::span<const uint8_t>;
67+
using difference_type = std::ptrdiff_t;
68+
using pointer = void;
69+
using reference = value_type;
70+
71+
// Basic forward-iterator operations over encoded elements in elements_span_.
72+
ConstIterator(const ByteBuffer* buffer, size_t cursor_offset);
73+
value_type operator*() const;
74+
ConstIterator& operator++();
75+
bool operator==(const ConstIterator& other) const;
76+
bool operator!=(const ConstIterator& other) const;
77+
78+
private:
79+
void ValidateFixedSizeElementAtCursor() const;
80+
size_t ReadAndValidateVariableElementSizeAtCursor() const;
81+
82+
const ByteBuffer* buffer_ = nullptr;
83+
size_t cursor_offset_ = 0;
84+
};
85+
86+
// Methods used by the STL iterator machinery to iterate over the buffer.
87+
ConstIterator begin() const;
88+
ConstIterator end() const;
89+
6090
protected:
6191
// Helper for reserve heuristics in variable-size parsing.
6292
static size_t EstimateOffsetsReserveCountFromSample(tcb::span<const uint8_t> bytes);
6393

6494
// Helper for calculating the offset of an element by position.
6595
size_t CalculateOffsetOfElement(size_t position) const;
6696

97+
// Helper to validate the preconditions for reading the buffer with an iterator.
98+
void ValidateIteratorReadPreconditions() const;
99+
100+
// Helper to check if the buffer is initialized from a span.
101+
bool IsInitializedFromSpan() const { return is_initialized_from_span_; }
102+
67103
// Variables for span elements reading
68104
tcb::span<const uint8_t> elements_span_;
69105
mutable size_t num_elements_;
@@ -94,7 +130,10 @@ class ByteBuffer {
94130
bool is_write_buffer_finalized_ = false;
95131
};
96132

133+
// Constant to mark an offset value as unset.
97134
inline constexpr size_t kUnsetVariableElementOffset = std::numeric_limits<size_t>::max();
98-
inline constexpr size_t kSizePrefixBytes = sizeof(uint32_t); // [u32 size] prefix for variable-size elements.
135+
136+
// Constant for the size of the [u32 size] prefix for variable-size elements.
137+
inline constexpr size_t kSizePrefixBytes = sizeof(uint32_t);
99138

100139
} // namespace dbps::processing

src/processing/byte_buffer_test.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class ByteBufferTestProxy : public ByteBuffer {
5151
bool GetHasFixedSizedElements() const { return has_fixed_sized_elements_; }
5252
size_t GetElementSize() const { return element_size_; }
5353
const std::vector<size_t>& GetOffsets() const { return offsets_; }
54+
bool GetIsInitializedFromSpan() const { return IsInitializedFromSpan(); }
5455
const std::vector<uint8_t>& GetWriteBuffer() const { return write_buffer_; }
5556
void AppendTrailingBytesForTest(tcb::span<const uint8_t> bytes) {
5657
write_buffer_.insert(write_buffer_.end(), bytes.begin(), bytes.end());
@@ -207,6 +208,45 @@ TEST(ByteBufferTest, GetElement_VariableSize_ReturnsExpectedPayload) {
207208
EXPECT_EQ(second[6], 0x37);
208209
}
209210

211+
TEST(ByteBufferTest, Iterate_ReadOnlyFixedSize_TraversesInOrder) {
212+
std::vector<uint8_t> bytes = {0x10, 0x11, 0x20, 0x21, 0x30, 0x31};
213+
ByteBufferTestProxy buffer(tcb::span<const uint8_t>(bytes), 2);
214+
215+
std::vector<std::vector<uint8_t>> collected;
216+
for (const auto element : buffer) {
217+
collected.push_back(std::vector<uint8_t>(element.begin(), element.end()));
218+
}
219+
220+
ASSERT_EQ(collected.size(), 3u);
221+
EXPECT_EQ(collected[0], (std::vector<uint8_t>{0x10, 0x11}));
222+
EXPECT_EQ(collected[1], (std::vector<uint8_t>{0x20, 0x21}));
223+
EXPECT_EQ(collected[2], (std::vector<uint8_t>{0x30, 0x31}));
224+
EXPECT_TRUE(buffer.GetOffsets().empty());
225+
EXPECT_EQ(buffer.GetNumElements(), 0u);
226+
EXPECT_FALSE(buffer.GetIsInitializedFromSpan());
227+
}
228+
229+
TEST(ByteBufferTest, Iterate_ReadOnlyVariableSize_TraversesInOrder) {
230+
// [len=5]["ABCDE"][len=7]["1234567"]
231+
std::vector<uint8_t> bytes = {
232+
0x05, 0x00, 0x00, 0x00, 0x41, 0x42, 0x43, 0x44, 0x45,
233+
0x07, 0x00, 0x00, 0x00, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
234+
};
235+
ByteBufferTestProxy buffer{tcb::span<const uint8_t>(bytes)};
236+
237+
std::vector<std::vector<uint8_t>> collected;
238+
for (const auto element : buffer) {
239+
collected.push_back(std::vector<uint8_t>(element.begin(), element.end()));
240+
}
241+
242+
ASSERT_EQ(collected.size(), 2u);
243+
EXPECT_EQ(collected[0], (std::vector<uint8_t>{0x41, 0x42, 0x43, 0x44, 0x45}));
244+
EXPECT_EQ(collected[1], (std::vector<uint8_t>{0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37}));
245+
EXPECT_TRUE(buffer.GetOffsets().empty());
246+
EXPECT_EQ(buffer.GetNumElements(), 0u);
247+
EXPECT_FALSE(buffer.GetIsInitializedFromSpan());
248+
}
249+
210250
TEST(ByteBufferTest, GetElement_OutOfRange_Throws) {
211251
std::vector<uint8_t> bytes = {0x01, 0x02, 0x03, 0x04};
212252
ByteBufferTestProxy buffer(tcb::span<const uint8_t>(bytes), 2);
@@ -600,6 +640,12 @@ TEST(ByteBufferTest, SetElement_OnReadOnlyBuffer_Throws) {
600640
EXPECT_THROW((void)buffer.SetElement(0, tcb::span<const uint8_t>(replacement)), InvalidInputException);
601641
}
602642

643+
TEST(ByteBufferTest, Iterate_OnWriteBuffer_Throws) {
644+
ByteBufferTestProxy buffer(3u, 2u);
645+
EXPECT_THROW((void)buffer.begin(), InvalidInputException);
646+
EXPECT_THROW((void)buffer.end(), InvalidInputException);
647+
}
648+
603649
TEST(ByteBufferTest, ConstructVariableSize_EmptyBuffer_InitializesEmptyState) {
604650
std::vector<uint8_t> bytes;
605651
ByteBufferTestProxy buffer{tcb::span<const uint8_t>(bytes)};

0 commit comments

Comments
 (0)