Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1ddbed9
TEMPORARY BRANCH FOR ZERO-COPY VERSION PERFORMANCE DEBUGGING
avalerio-tkd Mar 10, 2026
a997cce
- Timer switched to nanoseconds
avalerio-tkd Mar 10, 2026
f52a9e1
- Small optimization in GetRawElement for fixed-size elements
avalerio-tkd Mar 11, 2026
dec9c18
Merge branch 'main' into av_typelist_optimizing_079
avalerio-tkd Mar 11, 2026
2d551a2
- Updating to use GetWritableRawElement in the BasicXorEncryptor.
avalerio-tkd Mar 11, 2026
99d8c34
- Restored one lost comment in the BasicXorEncryptor.cpp
avalerio-tkd Mar 11, 2026
91b76db
- Adding streamlined iterator for typed buffers.
avalerio-tkd Mar 11, 2026
fb0566f
- Fixing unittests for streamlined iterator.
avalerio-tkd Mar 11, 2026
971903a
- Optimizing GetWritableRawElement for variable-size elements.
avalerio-tkd Mar 12, 2026
097d919
- Pushing small cleanups before pushing the Parquet-based num_element…
avalerio-tkd Mar 12, 2026
6d8944d
- Fixing issue with empty strings on iterator.
avalerio-tkd Mar 12, 2026
44357ac
- Added support for Parquet utils to read/decode num_elements from he…
avalerio-tkd Mar 13, 2026
2d7689a
- Propagate num_elements Parquet-based to TypedValuesBuffer and Basic…
avalerio-tkd Mar 13, 2026
ccfb079
- Fixed corner case in Parquet page v1 that didn't account for padded…
avalerio-tkd Mar 13, 2026
af827a0
- Added StringFixedSizedCodec and StringVariableSizedCodec to typed_b…
avalerio-tkd Mar 13, 2026
55c9baa
- Removing macOS hidden files.
avalerio-tkd Mar 13, 2026
3ad364d
- Removing Compression call from encryption sequencer final result (l…
avalerio-tkd Mar 16, 2026
f7c57e4
- Comment update.
avalerio-tkd Mar 16, 2026
145f85c
- Updating comments after code review.
avalerio-tkd Mar 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ _deps/

# macOS
.DS_Store
**/.!*.DS_Store

# Editor/IDE config
.vscode/
Expand Down
7 changes: 1 addition & 6 deletions src/client/dbps_api_client_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,14 @@
#include "tcb/span.hpp"
#include "dbps_api_client.h"
#include "http_client_base.h"
#include "../common/bytes_utils.h"
#include "../common/enums.h"
#include <nlohmann/json.hpp>
#include <gtest/gtest.h>

using namespace dbps::external;
using namespace dbps::enum_utils;

// TODO: Move this to a common test utility file.
// Helper function to convert string to binary data
std::vector<uint8_t> StringToBytes(const std::string& str) {
return std::vector<uint8_t>(str.begin(), str.end());
}

// Utility function to compare JSON strings, ignoring specified fields
bool CompareJsonStrings(const std::string& json1, const std::string& json2, const std::vector<std::string>& ignore_fields = {}) {
try {
Expand Down
18 changes: 18 additions & 0 deletions src/common/bytes_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ inline void write_u32_le_at(std::vector<uint8_t>& buf, size_t offset, uint32_t v
buf[offset + 3] = static_cast<uint8_t>((v >> 24) & 0xFF);
}

inline void write_u32_le(uint8_t* p, uint32_t v) {
p[0] = static_cast<uint8_t>(v);
p[1] = static_cast<uint8_t>(v >> 8);
p[2] = static_cast<uint8_t>(v >> 16);
p[3] = static_cast<uint8_t>(v >> 24);
}

inline uint32_t read_u32_le(const std::vector<uint8_t>& in, size_t offset) {
return static_cast<uint32_t>(in[offset]) |
(static_cast<uint32_t>(in[offset + 1]) << 8) |
Expand All @@ -95,6 +102,12 @@ inline uint32_t read_u32_le(tcb::span<const uint8_t> in, size_t offset) {
(static_cast<uint32_t>(in[offset + 3]) << 24);
}

inline uint32_t read_u32_le(const uint8_t* p) {
uint32_t v;
std::memcpy(&v, p, sizeof(v));
return v;
}

// Utility functions for splitting and joining byte vectors.

struct BytesPair {
Expand Down Expand Up @@ -289,3 +302,8 @@ inline std::string AddStringAttribute(
out[key] = value;
return value;
}

// Helper function to convert string to binary data
inline std::vector<uint8_t> StringToBytes(const std::string& str) {
return std::vector<uint8_t>(str.begin(), str.end());
}
Comment on lines +307 to +309

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should have a test for this function.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

45 changes: 45 additions & 0 deletions src/common/bytes_utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,49 @@ TEST(BytesUtils, AttributesMap_AddBool) {

std::map<std::string, std::string> bad_attrs{{"page_v2_is_compressed", "maybe"}};
EXPECT_THROW(AddBoolAttribute(out, bad_attrs, "page_v2_is_compressed"), InvalidInputException);
}

TEST(BytesUtils, StringToBytes_AsciiText) {
const std::string input = "dbps";
const std::vector<uint8_t> result = StringToBytes(input);

EXPECT_EQ((std::vector<uint8_t>{'d', 'b', 'p', 's'}), result);
}

TEST(BytesUtils, StringToBytes_EmptyString) {
const std::string input;
const std::vector<uint8_t> result = StringToBytes(input);

EXPECT_TRUE(result.empty());
}

TEST(BytesUtils, StringToBytes_PreservesRawBytesAndNulls) {
std::string input;
input.push_back('D');
input.push_back('B');
input.push_back('P');
input.push_back('S');
input.push_back('\0');
input.push_back('X');
input.push_back('Y');
input.push_back(static_cast<char>(0xFF));
input.push_back(static_cast<char>(0x80));
input.push_back('\0');
input.push_back('Z');

const std::vector<uint8_t> result = StringToBytes(input);
const std::vector<uint8_t> expected = {
static_cast<uint8_t>('D'),
static_cast<uint8_t>('B'),
static_cast<uint8_t>('P'),
static_cast<uint8_t>('S'),
static_cast<uint8_t>(0x00),
static_cast<uint8_t>('X'),
static_cast<uint8_t>('Y'),
static_cast<uint8_t>(0xFF),
static_cast<uint8_t>(0x80),
static_cast<uint8_t>(0x00),
static_cast<uint8_t>('Z')};

EXPECT_EQ(expected, result);
}
14 changes: 7 additions & 7 deletions src/common/dbpa_local_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, SuccessfulEncryption) {
Type::BYTE_ARRAY, std::nullopt, CompressionCodec::UNCOMPRESSED, std::nullopt));

std::vector<uint8_t> test_data = BuildByteArrayValueBytesForTesting("test_ABC");
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Encrypt(test_data, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand All @@ -74,7 +74,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, SuccessfulEncryptionCompressedDictiona
0x03, 0x32, 0x92, 0x12, 0xF3, 0x80, 0x10, 0x00, 0xC7, 0xB8,
0x50, 0xFC, 0x13, 0x00, 0x00, 0x00
};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Encrypt(test_data_gzip, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand All @@ -93,7 +93,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, SuccessfulDecryption) {
Type::BYTE_ARRAY, std::nullopt, CompressionCodec::UNCOMPRESSED, DBPS_ENCRYPTION_METADATA));

std::vector<uint8_t> test_data = BuildByteArrayValueBytesForTesting("test_EFG");
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Decrypt(test_data, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand All @@ -113,7 +113,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, RoundTripEncryptDecrypt) {

// Original data to encrypt
std::vector<uint8_t> original_data = BuildByteArrayValueBytesForTesting("roundtrip_XYZ");
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};

// Encrypt the data
auto encrypt_result = encrypt_agent.Encrypt(original_data, encoding_attributes);
Expand Down Expand Up @@ -156,7 +156,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, EncryptWithoutInit) {
LocalDataBatchProtectionAgent agent;

std::vector<uint8_t> test_data = {1, 2, 3, 4};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Encrypt(test_data, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand All @@ -171,7 +171,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, DecryptWithoutInit) {
LocalDataBatchProtectionAgent agent;

std::vector<uint8_t> test_data = {1, 2, 3, 4};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_encoding", "PLAIN"}, {"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Decrypt(test_data, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand Down Expand Up @@ -203,7 +203,7 @@ TEST_F(LocalDataBatchProtectionAgentTest, MissingPageEncoding) {
Type::BYTE_ARRAY, std::nullopt, CompressionCodec::UNCOMPRESSED, std::nullopt));

std::vector<uint8_t> test_data = {1, 2, 3, 4};
std::map<std::string, std::string> encoding_attributes = {{"page_type", "DICTIONARY_PAGE"}};
std::map<std::string, std::string> encoding_attributes = {{"page_type", "DICTIONARY_PAGE"}, {"dict_page_num_values", "1"}};
auto result = agent.Encrypt(test_data, encoding_attributes);

ASSERT_NE(result, nullptr);
Expand Down
27 changes: 13 additions & 14 deletions src/processing/encryption_sequencer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "../common/exceptions.h"
#include "encryptors/basic_xor_encryptor.h"
#include <functional>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <optional>
Expand Down Expand Up @@ -138,19 +137,20 @@ bool DataBatchEncryptionSequencer::DecodeAndEncrypt(tcb::span<const uint8_t> pla
*/
try {
// Decompress and split plaintext into level and value bytes
auto [level_bytes, value_bytes] = DecompressAndSplit(
auto [level_bytes, value_bytes, num_elements] = DecompressAndSplit(
plaintext, compression_, encoding_attributes_converted_);

// Parse value bytes into typed values buffer
auto typed_buffer = ReinterpretValueBytesAsTypedValuesBuffer(value_bytes, datatype_, datatype_length_, encoding_);
auto typed_buffer = ReinterpretValueBytesAsTypedValuesBuffer(
value_bytes, num_elements, datatype_, datatype_length_, encoding_);

// Encrypt the typed values buffer and level bytes, then join them into a single encrypted byte vector.
auto encrypted_value_bytes = encryptor_->EncryptValueList(typed_buffer);
auto encrypted_level_bytes = encryptor_->EncryptBlock(level_bytes);
auto joined_encrypted_bytes = JoinWithLengthPrefix(encrypted_level_bytes, encrypted_value_bytes);

// Compress the joined encrypted bytes
encrypted_result_ = Compress(joined_encrypted_bytes, encrypted_compression_);

// Encrypted payloads mostly have a low-compression ratio, so the gains in size from compression are minimal or negative.
// Therefore, the final joined encrypted bytes are returned as-is without compression.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this comment may end up generating more questions than answering them (i.e. the reviewers of the final version may not have context of the version where we did have compression). I'd suggest removing or rephrasing

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, removing it then.

encrypted_result_ = JoinWithLengthPrefix(encrypted_level_bytes, encrypted_value_bytes);

// Set the encryption type to per-value
encryption_metadata_[encryption_mode_key] = ENCRYPTION_MODE_PER_VALUE;
Expand Down Expand Up @@ -226,16 +226,15 @@ bool DataBatchEncryptionSequencer::DecryptAndEncode(tcb::span<const uint8_t> cip
error_message_ = "Failed to get encryption_mode from encryption_metadata";
return false;
}
std::string encryption_mode = encryption_mode_opt.value();
const std::string& encryption_mode = encryption_mode_opt.value();

// Per-value encryption
if (encryption_mode == ENCRYPTION_MODE_PER_VALUE) {
// Decompress the encrypted bytes
auto decompressed_encrypted_bytes = Decompress(ciphertext, encrypted_compression_);


// Split the joined encrypted bytes, then decrypt the level and value bytes separately.
auto [encrypted_level_bytes, encrypted_value_bytes] =
SplitWithLengthPrefix(tcb::span<const uint8_t>(decompressed_encrypted_bytes));
// The ciphertext payload is already the joined bytes without compression.
auto [encrypted_level_bytes, encrypted_value_bytes] = SplitWithLengthPrefix(ciphertext);

auto level_bytes = encryptor_->DecryptBlock(encrypted_level_bytes);
auto typed_buffer = encryptor_->DecryptValueList(encrypted_value_bytes);

Expand Down Expand Up @@ -294,7 +293,7 @@ bool DataBatchEncryptionSequencer::ConvertEncodingAttributesToValues() {
add_int("page_v2_num_nulls");
add_bool("page_v2_is_compressed");
} else if (page_type == "DICTIONARY_PAGE") {
// DICTIONARY_PAGE has no specific encoding attributes
add_int("dict_page_num_values");
} else {
throw InvalidInputException("Unexpected page type: " + page_type);
}
Expand Down
Loading
Loading