-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathCompressionDictionaryBuilder.hpp
More file actions
245 lines (216 loc) · 7.64 KB
/
Copy pathCompressionDictionaryBuilder.hpp
File metadata and controls
245 lines (216 loc) · 7.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_
#define QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_
#include <cstddef>
#include <cstdint>
#include <unordered_map>
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
namespace quickstep {
/** \addtogroup Compression
* @{
*/
/**
* @brief An object which accumulates typed values and builds a physical
* dictionary for a CompressionDictionary object. Usable with both
* fixed-length and variable-length types.
**/
class CompressionDictionaryBuilder {
public:
/**
* @brief Constructor.
*
* @param type The Type to build a CompressionDictionary for. Must be
* orderable by LessComparison.
**/
explicit CompressionDictionaryBuilder(const Type &type);
/**
* @brief Destructor.
**/
~CompressionDictionaryBuilder() {
}
/**
* @brief Get the number of entries (unique values/codes) in the dictionary
* being built.
*
* @return The number of entries in the dictionary.
**/
inline std::uint32_t numberOfEntries() const {
return num_values_;
}
/**
* @brief Get the number of bits needed to represent a code in the dictionary
* being built.
*
* @return The length, in bits, of codes for the dictionary.
**/
inline std::uint8_t codeLengthBits() const {
return code_length_bits_;
}
/**
* @brief Get the number of bytes used to represent a code in the dictionary
* being build when all codes are padded up to the next power-of-two
* number of bytes.
*
* @return The length, in bytes, of codes padded up to a power-of-two bytes.
**/
inline std::uint8_t codeLengthPaddedBytes() const {
if (code_length_bits_ < 9) {
return 1;
} else if (code_length_bits_ < 17) {
return 2;
} else {
return 4;
}
}
/**
* @brief Get the number of bytes needed to store the physical dictionary
* being built.
*
* @return The size, in bytes, of the dictionary.
**/
inline std::size_t dictionarySizeBytes() const {
if (fixed_type_length_) {
return 2 * sizeof(std::uint32_t) + total_value_size_;
} else {
return (num_values_ + 2 - null_value_present_) * sizeof(std::uint32_t)
+ total_value_size_;
}
}
/**
* @brief Determine if the dictionary being built contains a particular
* value.
**/
bool containsValue(const TypedValue &value) const {
DCHECK(value.isPlausibleInstanceOf(type_.getSignature()));
if (value.isNull()) {
return null_value_present_;
}
return value_set_.find(value) != value_set_.end();
}
/**
* @brief Lookup the code for a particular value in a dictionary that has
* been built by this CompressionDictionaryBuilder.
* @note This method uses a hash-table lookup for O(1) constant time
* performance in the general case, in contrast to the O(log(n))
* lookup performance of CompressionDictionary::getCodeForTypedValue().
* @warning Only call this AFTER buildDictionary().
*
* @param value A previously-inserted value to look up.
* @return The code that maps to value.
**/
inline std::uint32_t getCodeForValue(const TypedValue &value) const {
DCHECK(value.isPlausibleInstanceOf(type_.getSignature()));
DCHECK(containsValue(value));
DCHECK(built_);
if (value.isNull()) {
return num_values_ - 1;
}
return value_set_.find(value)->second;
}
/**
* @brief Construct a physical dictionary in the specified memory location.
*
* @param location The memory location where the physical dictionary should
* be built. Must have dictionarySizeBytes() available to write at
* location.
**/
void buildDictionary(void *location);
/**
* @brief Add a value to the dictionary being built.
* @note This method makes a copy of the value passed in. If the caller can
* guarantee that value remains in existence for the life of this
* CompressionDictionaryBuilder, it is more memory-efficient to use
* insertEntryByReference() instead.
*
* @param value A typed value to add to the dictionary.
* @return True if value has been added, false if it was already present and
* the dictionary was not modified.
**/
bool insertEntry(const TypedValue &value) {
DCHECK(!built_);
// This is used in buildDictionary() to avoid an intermediate copy of
// out-of-line data.
may_contain_out_of_line_literal_ = true;
return insertEntryInternal(value, false);
}
/**
* @brief Add a value to the dictionary being built without copying it.
* @warning The caller must ensure that value is not deleted until after
* done using this CompressionDictionaryBuilder.
*
* @param value A typed value to add to the dictionary.
* @return True if value has been added, false if it was already present and
* the dictionary was not modified.
**/
bool insertEntryByReference(const TypedValue &value) {
DCHECK(!built_);
return insertEntryInternal(value, true);
}
/**
* @brief Remove the last entry successfully added to the dictionary via
* insertEntry(), reducing the dictionary size and potentially
* reducing the code length in bits.
**/
void undoLastInsert();
private:
struct TypedValueHasher {
inline std::size_t operator()(const TypedValue &value) const {
return value.getHash();
}
};
struct TypedValueEqualChecker {
inline bool operator()(const TypedValue &left, const TypedValue &right) const {
return left.fastEqualCheck(right);
}
};
bool insertEntryInternal(const TypedValue &value, bool by_reference);
const Type &type_;
const bool type_is_nullable_;
std::size_t fixed_type_length_; // 0 for variable-length types.
// Initially, the uint32_t value associated with each key is simply the max
// for uint32_t. buildDictionary() updates values to be the actual compressed
// codes for each value so that getCodeForValue() can do fast lookups.
std::unordered_map<TypedValue,
std::uint32_t,
TypedValueHasher,
TypedValueEqualChecker>
value_set_;
std::unordered_map<TypedValue,
std::uint32_t,
TypedValueHasher,
TypedValueEqualChecker>::iterator
last_inserted_iterator_;
std::uint32_t num_values_;
std::uint8_t code_length_bits_;
std::size_t total_value_size_;
bool null_value_present_;
bool last_insert_was_null_;
bool may_contain_out_of_line_literal_;
// Keeps track of whether buildDictionary() has been called for
// error-checking purposes.
bool built_;
DISALLOW_COPY_AND_ASSIGN(CompressionDictionaryBuilder);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_