Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions source/common/http/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ envoy_cc_library(
envoy_cc_library(
name = "character_set_validation_lib",
hdrs = ["character_set_validation.h"],
deps = [
"@abseil-cpp//absl/strings:string_view",
],
)

envoy_cc_library(
Expand Down
113 changes: 72 additions & 41 deletions source/common/http/character_set_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,71 @@
#include <array>
#include <cstdint>

#include "absl/strings/string_view.h"

// A set of tables for validating that a character is in a specific
// character set. Used to validate RFC compliance for various HTTP protocol elements.

namespace Envoy {
namespace Http {

inline constexpr bool testCharInTable(const std::array<uint32_t, 8>& table, char c) {
// CPU cache friendly version of a lookup in a bit table of size 256.
// The table is organized as 8 32 bit words.
// This function looks up a bit from the `table` at the index `c`.
// This function is used to test whether a character `c` is allowed
// or not based on the value of a bit at index `c`.
uint8_t tmp = static_cast<uint8_t>(c);
// The `tmp >> 5` determines which of the 8 uint32_t words has the bit at index `uc`.
// The `0x80000000 >> (tmp & 0x1f)` determines the index of the bit within the 32 bit word.
return (table[tmp >> 5] & (0x80000000 >> (tmp & 0x1f))) != 0;
}
struct CharTable {
const std::array<uint32_t, 8> table_;

static inline constexpr uint32_t row(char c) { return static_cast<uint8_t>(c) >> 5; }
static inline constexpr uint32_t mask(char c) {
return 0x80000000 >> (static_cast<uint8_t>(c) & 0x1f);
}
inline constexpr bool hasChar(char c) const { return (table_[row(c)] & mask(c)) != 0; }
inline static constexpr void set(std::array<uint32_t, 8>& table, char c) {
table[row(c)] |= mask(c);
}
static inline constexpr CharTable fromChars(absl::string_view chars) {
std::array<uint32_t, 8> table{};
for (char c : chars) {
set(table, c);
}
return {table};
}
constexpr CharTable operator|(const CharTable& o) const {
std::array<uint32_t, 8> table;
for (int i = 0; i < 8; i++) {
table[i] = table_[i] | o.table_[i];
}
return {table};
}
constexpr CharTable operator&(const CharTable& o) const {
std::array<uint32_t, 8> table;
for (int i = 0; i < 8; i++) {
table[i] = table_[i] & o.table_[i];
}
return {table};
}
constexpr CharTable operator~() const {
std::array<uint32_t, 8> table;
for (int i = 0; i < 8; i++) {
table[i] = ~table_[i];
}
return {table};
}
};

namespace CharTables {
// Bits 65 (A) to 90 (Z)
static inline constexpr CharTable kUppercase{
{0, 0, 0b01111111111111111111111111100000, 0, 0, 0, 0, 0}};
// Bits 97 (a) to 122 (z)
static inline constexpr CharTable kLowercase{
{0, 0, 0, 0b01111111111111111111111111100000, 0, 0, 0, 0}};
// Bits 33 (!) to 127 (~).
static inline constexpr CharTable kPrintable{{0, 0x7fffffff, 0xffffffff, 0xfffffffe, 0, 0, 0, 0}};
// Bits 129 to 255.
static inline constexpr CharTable kExtendedAscii{
{0, 0, 0, 0, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}};
// Bits 48 ('0') to 57 ('9')
static inline constexpr CharTable kDigits{
{0, 0b00000000000000001111111111000000, 0, 0, 0, 0, 0, 0}};
static inline constexpr CharTable kAlphanumeric = kUppercase | kLowercase | kDigits;
// Header name character table.
// From RFC 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-5.1:
//
Expand All @@ -33,44 +80,28 @@ inline constexpr bool testCharInTable(const std::array<uint32_t, 8>& table, char
// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
// / DIGIT / ALPHA
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kGenericHeaderNameCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01011111001101101111111111000000,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111111111111111111111111100011,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b11111111111111111111111111101010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr CharTable kGenericHeaderName =
kAlphanumeric | CharTable::fromChars("!#$%&'*+-.^_`|~");

// A URI query and fragment character table. From RFC 3986:
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.4
//
// SPELLCHECKER(off)
// query = *( pchar / "/" / "?" )
// fragment = *( pchar / "/" / "?" )
//
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
// pct-encoded = "%" HEXDIG HEXDIG
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kUriQueryAndFragmentCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01001111111111111111111111110101,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b11111111111111111111111111100001,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b01111111111111111111111111100010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr CharTable kUriQueryAndFragment =
kAlphanumeric | CharTable::fromChars("/?"
":@"
"-._~"
"%"
"!$&'()*+,;=");
} // namespace CharTables

} // namespace Http
} // namespace Envoy
2 changes: 1 addition & 1 deletion source/common/http/header_utility.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ bool HeaderUtility::headerNameIsValid(absl::string_view header_key) {
// TODO(yanavlasov): make validation in HTTP/2 case stricter.
bool is_valid = true;
for (auto iter = header_key.begin(); iter != header_key.end() && is_valid; ++iter) {
is_valid &= testCharInTable(kGenericHeaderNameCharTable, *iter);
is_valid &= CharTables::kGenericHeaderName.hasChar(*iter);
}
return is_valid;
}
Expand Down
42 changes: 9 additions & 33 deletions source/common/http/utility.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1324,41 +1324,17 @@ namespace {
// %-encode all ASCII character codepoints, EXCEPT:
// ALPHA | DIGIT | * | - | . | _
// SPACE is encoded as %20, NOT as the + character
constexpr std::array<uint32_t, 8> kUrlEncodedCharTable = {
// control characters
0b11111111111111111111111111111111,
// !"#$%&'()*+,-./0123456789:;<=>?
0b11111111110110010000000000111111,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b10000000000000000000000000011110,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b10000000000000000000000000011111,
// extended ascii
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
};
constexpr CharTable kUrlEncodedCharTable =
~(CharTables::kAlphanumeric | CharTable::fromChars("*-._"));

constexpr std::array<uint32_t, 8> kUrlDecodedCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01011111111111111111111111110101,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b11111111111111111111111111110101,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b11111111111111111111111111100010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
// The set of characters which, if they are percent-encoded, should be
// decoded.
constexpr CharTable kUrlDecodedCharTable =
CharTables::kAlphanumeric | CharTable::fromChars("!#$%&'()*+,-./:;=?@[]_`~");

bool shouldPercentEncodeChar(char c) { return testCharInTable(kUrlEncodedCharTable, c); }
constexpr bool shouldPercentEncodeChar(char c) { return kUrlEncodedCharTable.hasChar(c); }

bool shouldPercentDecodeChar(char c) { return testCharInTable(kUrlDecodedCharTable, c); }
constexpr bool shouldPercentDecodeChar(char c) { return kUrlDecodedCharTable.hasChar(c); }
} // namespace

std::string Utility::PercentEncoding::urlEncode(absl::string_view value) {
Expand Down Expand Up @@ -1671,7 +1647,7 @@ bool Utility::isValidRefererValue(absl::string_view value) {
seen_slash = true;
continue;
default:
if (!testCharInTable(kUriQueryAndFragmentCharTable, c)) {
if (!CharTables::kUriQueryAndFragment.hasChar(c)) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,9 @@ namespace EnvoyDefault {
// VCHAR = %x21-7E
// ; visible (printing) characters
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kGenericHeaderValueCharTable = {
// control characters
0b00000000010000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b11111111111111111111111111111111,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b11111111111111111111111111111111,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b11111111111111111111111111111110,
// extended ascii
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
0b11111111111111111111111111111111,
};
inline constexpr ::Envoy::Http::CharTable kGenericHeaderValueCharTable =
::Envoy::Http::CharTables::kPrintable | ::Envoy::Http::CharTables::kExtendedAscii |
::Envoy::Http::CharTable::fromChars("\t ");

// :method header character table.
// From RFC 9110: https://www.rfc-editor.org/rfc/rfc9110.html#section-9.1
Expand All @@ -49,21 +37,9 @@ inline constexpr std::array<uint32_t, 8> kGenericHeaderValueCharTable = {
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "."
// / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kMethodHeaderCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01011111001101101111111111000000,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111111111111111111111111100011,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b11111111111111111111111111101010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kMethodHeaderCharTable =
::Envoy::Http::CharTables::kAlphanumeric |
::Envoy::Http::CharTable::fromChars("!#$%&'*+-.^_`|~");

// :path header character table.
// From RFC 3986: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
Expand All @@ -88,41 +64,16 @@ inline constexpr std::array<uint32_t, 8> kMethodHeaderCharTable = {
//
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kPathHeaderCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01001111111111111111111111110100,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b11111111111111111111111111100001,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b01111111111111111111111111100010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kPathHeaderCharTable =
::Envoy::Http::CharTables::kAlphanumeric |
::Envoy::Http::CharTable::fromChars("!$%&'()*+,-./:;=@_~");

// Unreserved characters.
// From RFC 3986: https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
//
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
inline constexpr std::array<uint32_t, 8> kUnreservedCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b00000000000001101111111111000000,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111111111111111111111111100001,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b01111111111111111111111111100010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kUnreservedCharTable =
::Envoy::Http::CharTables::kAlphanumeric | ::Envoy::Http::CharTable::fromChars("-._~");

// Transfer-Encoding HTTP/1.1 header character table.
// From RFC 9110: https://www.rfc-editor.org/rfc/rfc9110.html#section-10.1.4
Expand All @@ -132,62 +83,25 @@ inline constexpr std::array<uint32_t, 8> kUnreservedCharTable = {
// transfer-coding = token *( OWS ";" OWS transfer-parameter )
// transfer-parameter = token BWS "=" BWS ( token / quoted-string )
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kTransferEncodingHeaderCharTable = {
// control characters
0b00000000010000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b11111111001111101111111111010100,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111111111111111111111111100011,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b11111111111111111111111111101010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kTransferEncodingHeaderCharTable =
::Envoy::Http::CharTables::kAlphanumeric |
::Envoy::Http::CharTable::fromChars("\t !\"#$%&'*+,-.;=^_`|~");

// An IPv6 address, excluding the surrounding "[" and "]" characters. This is based on RFC 3986,
// https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2, that only allows hex digits and the
// ":" separator.
inline constexpr std::array<uint32_t, 8> kHostIPv6AddressCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b00000000000000001111111111100000,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111110000000000000000000000000,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b01111110000000000000000000000000,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kHostIPv6AddressCharTable =
::Envoy::Http::CharTable::fromChars("0123456789:ABCDEFabcdef");

// A host reg-name character table, which covers both IPv4 addresses and hostnames.
// From RFC 3986: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2
//
// SPELLCHECKER(off)
// reg-name = *( unreserved / pct-encoded / sub-delims )
// SPELLCHECKER(on)
inline constexpr std::array<uint32_t, 8> kHostRegNameCharTable = {
// control characters
0b00000000000000000000000000000000,
// !"#$%&'()*+,-./0123456789:;<=>?
0b01001111111111101111111111010100,
//@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
0b01111111111111111111111111100001,
//`abcdefghijklmnopqrstuvwxyz{|}~
0b01111111111111111111111111100010,
// extended ascii
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
0b00000000000000000000000000000000,
};
inline constexpr ::Envoy::Http::CharTable kHostRegNameCharTable =
::Envoy::Http::CharTables::kAlphanumeric |
::Envoy::Http::CharTable::fromChars("!$%&'()*+,-.;=_~");

} // namespace EnvoyDefault
} // namespace HeaderValidators
Expand Down
Loading
Loading