-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathencryption_sequencer.cpp
More file actions
370 lines (322 loc) · 15.3 KB
/
Copy pathencryption_sequencer.cpp
File metadata and controls
370 lines (322 loc) · 15.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "encryption_sequencer.h"
#include "enum_utils.h"
#include "parquet_utils.h"
#include "../common/bytes_utils.h"
#include "compression_utils.h"
#include "../common/exceptions.h"
#include "encryptors/basic_xor_encryptor.h"
#include <functional>
#include <iostream>
#include <sstream>
#include <optional>
#include <cassert>
#include <cstring>
#include <memory>
using namespace dbps::external;
using namespace dbps::enum_utils;
using namespace dbps::compression;
namespace {
constexpr const char* DBPS_VERSION_KEY = "dbps_agent_version";
constexpr const char* DBPS_VERSION = "v0.01";
constexpr const char* ENCRYPTION_MODE_KEY_DICTIONARY_PAGE = "encrypt_mode_dict_page";
constexpr const char* ENCRYPTION_MODE_KEY_DATA_PAGE = "encrypt_mode_data_page";
constexpr const char* ENCRYPTION_MODE_PER_BLOCK = "per_block";
constexpr const char* ENCRYPTION_MODE_PER_VALUE = "per_value";
}
// Helper function to create encryptor instance
static std::unique_ptr<DBPSEncryptor> CreateEncryptor(
const std::string& key_id,
const std::string& column_name,
const std::string& user_id,
const std::string& application_context,
Type::type datatype) {
// Return a BasicXorEncryptor instance.
return std::make_unique<BasicXorEncryptor>(key_id, column_name, user_id, application_context, datatype);
}
// Constructor implementation
DataBatchEncryptionSequencer::DataBatchEncryptionSequencer(
const std::string& column_name,
Type::type datatype,
const std::optional<int>& datatype_length,
CompressionCodec::type compression,
Encoding::type encoding,
const std::map<std::string, std::string>& encoding_attributes,
CompressionCodec::type encrypted_compression,
const std::string& key_id,
const std::string& user_id,
const std::string& application_context,
const std::map<std::string, std::string>& encryption_metadata
) : column_name_(column_name),
datatype_(datatype),
datatype_length_(datatype_length),
compression_(compression),
encoding_(encoding),
encoding_attributes_(encoding_attributes),
encrypted_compression_(encrypted_compression),
key_id_(key_id),
user_id_(user_id),
application_context_(application_context),
encryption_metadata_(encryption_metadata),
encryptor_(CreateEncryptor(key_id, column_name, user_id, application_context, datatype)) {}
// Constructor with pre-built encryptor
DataBatchEncryptionSequencer::DataBatchEncryptionSequencer(
const std::string& column_name,
Type::type datatype,
const std::optional<int>& datatype_length,
CompressionCodec::type compression,
Encoding::type encoding,
const std::map<std::string, std::string>& encoding_attributes,
CompressionCodec::type encrypted_compression,
const std::string& key_id,
const std::string& user_id,
const std::string& application_context,
const std::map<std::string, std::string>& encryption_metadata,
std::unique_ptr<DBPSEncryptor> encryptor
) : column_name_(column_name),
datatype_(datatype),
datatype_length_(datatype_length),
compression_(compression),
encoding_(encoding),
encoding_attributes_(encoding_attributes),
encrypted_compression_(encrypted_compression),
key_id_(key_id),
user_id_(user_id),
application_context_(application_context),
encryption_metadata_(encryption_metadata),
encryptor_(std::move(encryptor)) {}
// Top level encryption/decryption methods.
bool DataBatchEncryptionSequencer::DecodeAndEncrypt(tcb::span<const uint8_t> plaintext) {
// Validate all parameters and key_id
if (!ValidateParameters()) {
return false;
}
// Check that plaintext is not null and not empty
if (plaintext.empty()) {
error_stage_ = "validation";
error_message_ = "plaintext cannot be null or empty";
return false;
}
auto encryption_mode_key = GetEncryptionModeKey();
/*
* Note on try-catch block:
* - When fully done, DecodeAndEncrypt will support per-value encryption for all cases, except for
* (1) BOOLEAN datatype and (2) RLE_DICTIONARY encoding.
* - This try-catch block allows features to be developed incrementally until all features are
* complete: Compressions, Encodings, Page types, Datatypes.
* - During development if a feature is not yet supported, UnsupportedExceptions are caught and the fallback to
* per-block encryption is used.
* - Once per-value encryption for all cases is complete, the try-catch block and the call to EncryptBlock must be removed.
*/
try {
// Decompress and split plaintext into level and value bytes
auto [level_bytes, value_bytes, num_elements] = DecompressAndSplit(
plaintext, compression_, encoding_attributes_converted_);
// Parse value bytes into typed values buffer
auto typed_buffer = ReinterpretValueBytesAsTypedValuesBuffer(
value_bytes, num_elements, datatype_, datatype_length_, encoding_);
// Encrypt the typed values buffer and level bytes, then join them into a single encrypted byte vector.
auto encrypted_value_bytes = encryptor_->EncryptValueList(typed_buffer);
auto encrypted_level_bytes = encryptor_->EncryptBlock(level_bytes);
// Encrypted payloads mostly have a low-compression ratio, so the gains in size from compression are minimal or negative.
// Therefore, the final joined encrypted bytes are returned as-is without compression.
encrypted_result_ = JoinWithLengthPrefix(encrypted_level_bytes, encrypted_value_bytes);
// Set the encryption type to per-value
encryption_metadata_[encryption_mode_key] = ENCRYPTION_MODE_PER_VALUE;
encryption_metadata_[DBPS_VERSION_KEY] = DBPS_VERSION;
return true;
}
// Allow fallback to per-block encryption, only for explicitly unsupported conditions. See note above.
catch (const DBPSUnsupportedException& e) {
// Compression: Only UNCOMPRESSED and SNAPPY are currently supported
const bool is_compression_supported = (compression_ == CompressionCodec::UNCOMPRESSED ||
compression_ == CompressionCodec::SNAPPY);
// Encoding: Only PLAIN is currently supported
// RLE_DICTIONARY is not supported for per-value encryption since the values are not present in the
// `plaintext` data, only references to them.
const bool is_encoding_supported = (encoding_ == Encoding::PLAIN && encoding_ != Encoding::RLE_DICTIONARY);
// Page type: All are supported (DATA_PAGE_V1, DATA_PAGE_V2, DICTIONARY_PAGE)
const bool is_page_supported = true;
// Datatype: All datatypes are supported except BOOLEAN.
// BOOLEAN is not supported for per-value encryption and always defaults to per-block encryption.
const bool is_datatype_supported = (datatype_ != Type::BOOLEAN);
if (is_compression_supported && is_encoding_supported && is_page_supported && is_datatype_supported) {
// All conditions are supported, therefore an DBPSUnsupportedException exception should not have happened.
// Re-throw the exception.
throw;
}
encrypted_result_ = encryptor_->EncryptBlock(plaintext);
if (encrypted_result_.empty()) {
error_stage_ = "encryption";
error_message_ = "Failed to encrypt data";
return false;
}
encryption_metadata_[encryption_mode_key] = ENCRYPTION_MODE_PER_BLOCK;
encryption_metadata_[DBPS_VERSION_KEY] = DBPS_VERSION;
return true;
} catch (const InvalidInputException& e) {
// InvalidInputException is treated as any other exception and is re-thrown to be handled by the caller.
throw;
}
}
bool DataBatchEncryptionSequencer::DecryptAndEncode(tcb::span<const uint8_t> ciphertext) {
// Validate all parameters and key_id
if (!ValidateParameters()) {
return false;
}
// Check that ciphertext is not null and not empty
if (ciphertext.empty()) {
error_stage_ = "validation";
error_message_ = "ciphertext cannot be null or empty";
return false;
}
// Check encryption_metadata for dbps_agent_version
std::string version_error = ValidateDecryptionVersion();
if (!version_error.empty()) {
error_stage_ = "decrypt_version_check";
error_message_ = version_error;
return false;
}
// Get encryption_mode from encryption_metadata
auto encryption_mode_opt = SafeGetEncryptionMode();
if (!encryption_mode_opt.has_value()) {
error_stage_ = "decrypt_encryption_mode_validation";
error_message_ = "Failed to get encryption_mode from encryption_metadata";
return false;
}
const std::string& encryption_mode = encryption_mode_opt.value();
// Per-value encryption
if (encryption_mode == ENCRYPTION_MODE_PER_VALUE) {
// Split the joined encrypted bytes, then decrypt the level and value bytes separately.
// The ciphertext payload is already the joined bytes without compression.
auto [encrypted_level_bytes, encrypted_value_bytes] = SplitWithLengthPrefix(ciphertext);
auto level_bytes = encryptor_->DecryptBlock(encrypted_level_bytes);
auto typed_buffer = encryptor_->DecryptValueList(encrypted_value_bytes);
// Convert the decrypted typed values buffer back to value bytes
auto value_bytes = GetTypedValuesBufferAsValueBytes(std::move(typed_buffer));
// Join the decrypted level and value bytes, then compress to get plaintext
decrypted_result_ = CompressAndJoin(
level_bytes, value_bytes, compression_, encoding_attributes_converted_);
}
// Per-block encryption
else if (encryption_mode == ENCRYPTION_MODE_PER_BLOCK) {
// Simple XOR decryption (same operation as encryption) for per-block encryption
decrypted_result_ = encryptor_->DecryptBlock(ciphertext);
if (decrypted_result_.empty()) {
error_stage_ = "decryption";
error_message_ = "Failed to decrypt data";
return false;
}
}
return true;
}
// Helper methods to validate and basic parameter reading.
bool DataBatchEncryptionSequencer::ConvertEncodingAttributesToValues() {
try {
auto add_str = [&](const std::string& key) {
return AddStringAttribute(encoding_attributes_converted_, encoding_attributes_, key);
};
auto add_int = [&](const std::string& key) {
return AddIntAttribute(encoding_attributes_converted_, encoding_attributes_, key);
};
auto add_bool = [&](const std::string& key) {
return AddBoolAttribute(encoding_attributes_converted_, encoding_attributes_, key);
};
std::string page_type = add_str("page_type");
// Convert common attributes for DATA_PAGE_V1 and DATA_PAGE_V2
if (page_type == "DATA_PAGE_V1" || page_type == "DATA_PAGE_V2") {
add_int("data_page_num_values");
add_int("data_page_max_definition_level");
add_int("data_page_max_repetition_level");
}
// Check specific attributes for each page type.
if (page_type == "DATA_PAGE_V1") {
add_str("page_v1_definition_level_encoding");
add_str("page_v1_repetition_level_encoding");
} else if (page_type == "DATA_PAGE_V2") {
add_int("page_v2_definition_levels_byte_length");
add_int("page_v2_repetition_levels_byte_length");
add_int("page_v2_num_nulls");
add_bool("page_v2_is_compressed");
} else if (page_type == "DICTIONARY_PAGE") {
add_int("dict_page_num_values");
} else {
throw InvalidInputException("Unexpected page type: " + page_type);
}
return true;
} catch (const InvalidInputException& e) {
error_stage_ = "encoding_attribute_conversion";
error_message_ = e.what();
return false;
}
}
bool DataBatchEncryptionSequencer::ValidateParameters() {
// Convert encoding attributes to typed values
if (!ConvertEncodingAttributesToValues()) {
return false;
}
// Check that key_id is not null and not empty
if (key_id_.empty()) {
error_stage_ = "validation";
error_message_ = "key_id cannot be null or empty";
return false;
}
// Check FIXED_LEN_BYTE_ARRAY datatype_length requirement
if (datatype_ == Type::FIXED_LEN_BYTE_ARRAY) {
if (!datatype_length_.has_value()) {
error_stage_ = "parameter_validation";
error_message_ = "FIXED_LEN_BYTE_ARRAY datatype requires datatype_length parameter";
return false;
}
if (datatype_length_.value() <= 0) {
error_stage_ = "parameter_validation";
error_message_ = "FIXED_LEN_BYTE_ARRAY datatype_length must be positive";
return false;
}
}
return true;
}
std::string DataBatchEncryptionSequencer::ValidateDecryptionVersion() {
auto it = encryption_metadata_.find(DBPS_VERSION_KEY);
if (it == encryption_metadata_.end()) {
std::cerr << "ERROR: EncryptionSequencer - encryption_metadata must contain key '" << DBPS_VERSION_KEY << "'" << std::endl;
return "encryption_metadata must contain key '" + std::string(DBPS_VERSION_KEY) + "'";
} else if (it->second.find(DBPS_VERSION) != 0) {
std::cerr << "ERROR: EncryptionSequencer - encryption_metadata['" << DBPS_VERSION_KEY << "'] must match '"
<< DBPS_VERSION << "', but got '" << it->second << "'" << std::endl;
return "encryption_metadata['" + std::string(DBPS_VERSION_KEY) + "'] must match '" + std::string(DBPS_VERSION) + "'";
}
return "";
}
const char* DataBatchEncryptionSequencer::GetEncryptionModeKey() {
auto page_type = std::get<std::string>(encoding_attributes_converted_.at("page_type"));
return (page_type == "DICTIONARY_PAGE") ? ENCRYPTION_MODE_KEY_DICTIONARY_PAGE : ENCRYPTION_MODE_KEY_DATA_PAGE;
}
std::optional<std::string> DataBatchEncryptionSequencer::SafeGetEncryptionMode() {
auto it = encryption_metadata_.find(GetEncryptionModeKey());
if (it == encryption_metadata_.end()) {
// The metadata key for encryption mode is missing.
return std::nullopt;
}
const std::string& encryption_mode = it->second;
if (encryption_mode != ENCRYPTION_MODE_PER_BLOCK && encryption_mode != ENCRYPTION_MODE_PER_VALUE) {
// The value for encryption mode is not valid.
return std::nullopt;
}
return encryption_mode;
}