Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions aui.core/src/AUI/Common/AChar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,33 @@

#include <AUI/Common/AStaticVector.h>

static char toByte(uint32_t value) {
return static_cast<char>(static_cast<unsigned char>(value));
}

AStaticVector<char, 4> AChar::toUtf8() const noexcept {
if (mValue <= 0x7F) {
return { static_cast<char>(mValue) };
return { toByte(mValue) };
}
if (mValue <= 0x7FF) {
return { static_cast<char>(0xC0 | (mValue >> 6)), static_cast<char>(0x80 | (mValue & 0x3F)) };
return { toByte(0xC0 | (mValue >> 6)), toByte(0x80 | (mValue & 0x3F)) };
}
if (mValue <= 0xFFFF) {
if (mValue >= 0xD800 && mValue <= 0xDFFF) {
return {}; // Invalid Unicode code point
return {};
}
return {
static_cast<char>(0xE0 | (mValue >> 12)),
static_cast<char>(0x80 | ((mValue >> 6) & 0x3F)),
static_cast<char>(0x80 | (mValue & 0x3F))
toByte(0xE0 | (mValue >> 12)),
toByte(0x80 | ((mValue >> 6) & 0x3F)),
toByte(0x80 | (mValue & 0x3F))
};
}
if (mValue <= 0x10FFFF) {
return {
static_cast<char>(0xF0 | (mValue >> 18)),
static_cast<char>(0x80 | ((mValue >> 12) & 0x3F)),
static_cast<char>(0x80 | ((mValue >> 6) & 0x3F)),
static_cast<char>(0x80 | (mValue & 0x3F))
toByte(0xF0 | (mValue >> 18)),
toByte(0x80 | ((mValue >> 12) & 0x3F)),
toByte(0x80 | ((mValue >> 6) & 0x3F)),
toByte(0x80 | (mValue & 0x3F))
};
}
return {}; // Invalid Unicode code point
Expand Down
268 changes: 1 addition & 267 deletions aui.core/src/AUI/Common/AString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,199 +16,6 @@
#include <AUI/Common/AByteBuffer.h>
#include <simdutf.h>

// utf8 stuff has a lot of magic
// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-pro-bounds-pointer-arithmetic)

size_t AUtf8MutableIterator::getCurrentCharByteLength() const noexcept {
if (!string_ || byte_pos_ >= string_->size()) {
return 0;
}

const char* data = string_->data();
unsigned char first_byte = static_cast<unsigned char>(data[byte_pos_]);

if (first_byte < 0x80) return 1; // 0xxxxxxx
if (first_byte < 0xC0) return 1; // Invalid continuation byte, treat as 1
if (first_byte < 0xE0) return 2; // 110xxxxx
if (first_byte < 0xF0) return 3; // 1110xxxx
if (first_byte < 0xF8) return 4; // 11110xxx
return 1; // Invalid, treat as 1
}

size_t AUtf8MutableIterator::getEncodedByteLength(char32_t codepoint) noexcept {
if (codepoint <= 0x7F) return 1;
if (codepoint <= 0x7FF) return 2;
if (codepoint <= 0xFFFF) return 3;
if (codepoint <= 0x10FFFF) return 4;
return 3; // Invalid codepoint, encode as replacement character (3 bytes)
}

size_t AUtf8MutableIterator::encodeUtf8(char32_t codepoint, char* buffer) noexcept {
if (codepoint <= 0x7F) {
buffer[0] = static_cast<char>(codepoint);
return 1;
}
if (codepoint <= 0x7FF) {
buffer[0] = static_cast<char>(0xC0 | (codepoint >> 6));
buffer[1] = static_cast<char>(0x80 | (codepoint & 0x3F));
return 2;
}
if (codepoint <= 0xFFFF) {
buffer[0] = static_cast<char>(0xE0 | (codepoint >> 12));
buffer[1] = static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
buffer[2] = static_cast<char>(0x80 | (codepoint & 0x3F));
return 3;
}
if (codepoint <= 0x10FFFF) {
buffer[0] = static_cast<char>(0xF0 | (codepoint >> 18));
buffer[1] = static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
buffer[2] = static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
buffer[3] = static_cast<char>(0x80 | (codepoint & 0x3F));
return 4;
}

// Invalid codepoint, encode replacement character U+FFFD
buffer[0] = static_cast<char>(0xEF);
buffer[1] = static_cast<char>(0xBF);
buffer[2] = static_cast<char>(0xBD);
return 3;
}

AUtf8MutableIterator::AUtf8MutableIterator() noexcept
: string_(nullptr), byte_pos_(0) {}

AUtf8MutableIterator::AUtf8MutableIterator(AString* str, size_t pos) noexcept
: string_(str), byte_pos_(pos) {}

const AChar AUtf8MutableIterator::operator*() const noexcept {
if (!string_ || byte_pos_ >= string_->size()) {
return AChar();
}

size_t temp_pos = byte_pos_;
return aui::utf8::detail::decodeUtf8At(string_->data(), temp_pos, string_->size());
}

AUtf8MutableIterator& AUtf8MutableIterator::operator=(AChar c) {
if (!string_ || byte_pos_ >= string_->size()) {
return *this;
}

char32_t new_codepoint = c.codepoint();
size_t current_char_bytes = getCurrentCharByteLength();
size_t new_char_bytes = getEncodedByteLength(new_codepoint);

char utf8_buffer[4];
size_t encoded_bytes = encodeUtf8(new_codepoint, utf8_buffer);

if (current_char_bytes == new_char_bytes) {
// Same byte length - simple replacement
std::memcpy(string_->data() + byte_pos_, utf8_buffer, encoded_bytes);
} else if (current_char_bytes > new_char_bytes) {
// New character is shorter - replace and shift left
std::memcpy(string_->data() + byte_pos_, utf8_buffer, encoded_bytes);

size_t shift_start = byte_pos_ + current_char_bytes;
size_t shift_end = string_->size();
size_t bytes_to_shift = shift_end - shift_start;

if (bytes_to_shift > 0) {
std::memmove(string_->data() + byte_pos_ + encoded_bytes,
string_->data() + shift_start,
bytes_to_shift);
}

string_->resize(string_->size() - (current_char_bytes - encoded_bytes));
} else {
// New character is longer - need to make space and shift right
size_t bytes_to_add = new_char_bytes - current_char_bytes;
size_t old_size = string_->size();
string_->resize(old_size + bytes_to_add);

size_t shift_start = byte_pos_ + current_char_bytes;
size_t bytes_to_shift = old_size - shift_start;

if (bytes_to_shift > 0) {
std::memmove(string_->data() + byte_pos_ + encoded_bytes,
string_->data() + shift_start,
bytes_to_shift);
}

std::memcpy(string_->data() + byte_pos_, utf8_buffer, encoded_bytes);
}

return *this;
}

AUtf8MutableIterator& AUtf8MutableIterator::operator++() noexcept {
if (string_ && byte_pos_ < string_->size()) {
size_t temp_pos = byte_pos_;
aui::utf8::detail::decodeUtf8At(string_->data(), temp_pos, string_->size());
byte_pos_ = temp_pos;
}
return *this;
}

AUtf8MutableIterator AUtf8MutableIterator::operator++(int) noexcept {
AUtf8MutableIterator temp = *this;
++(*this);
return temp;
}

AUtf8MutableIterator& AUtf8MutableIterator::operator--() noexcept {
if (string_ && byte_pos_ > 0) {
byte_pos_ = aui::utf8::detail::getPrevCharStart(string_->data(), byte_pos_);
}
return *this;
}

AUtf8MutableIterator AUtf8MutableIterator::operator--(int) noexcept {
AUtf8MutableIterator temp = *this;
--(*this);
return temp;
}

AUtf8MutableIterator& AUtf8MutableIterator::operator+=(int n) noexcept {
if (n > 0) {
for (int i = 0; i < n && string_ && byte_pos_ < string_->size(); ++i) {
++(*this);
}
} else if (n < 0) {
for (int i = 0; i > n && string_ && byte_pos_ > 0; --i) {
--(*this);
}
}
return *this;
}

bool AUtf8MutableIterator::operator==(const AUtf8MutableIterator& other) const noexcept {
return string_ == other.string_ && byte_pos_ == other.byte_pos_;
}

bool AUtf8MutableIterator::operator!=(const AUtf8MutableIterator& other) const noexcept {
return !(*this == other);
}

size_t AUtf8MutableIterator::getBytePos() const noexcept {
return byte_pos_;
}

AString* AUtf8MutableIterator::getString() const noexcept {
return string_;
}

AUtf8MutableIterator::operator AUtf8ConstIterator() const noexcept {
if (!string_) {
return AUtf8ConstIterator();
}
return AUtf8ConstIterator(string_->data(), string_->data(),
string_->data() + string_->size(), byte_pos_);
}

auto AUtf8MutableIterator::operator-(const AUtf8MutableIterator& other) const noexcept -> difference_type {
return (operator AUtf8ConstIterator()) - (other.operator AUtf8ConstIterator());
}

AString AString::numberHex(int i) {
char buf[32];
std::snprintf(buf, sizeof(buf), "%x", static_cast<unsigned>(i));
Expand Down Expand Up @@ -293,8 +100,6 @@ AString::AString(std::span<const std::byte> bytes, AStringEncoding encoding) : A

AString::AString(super::const_iterator begin, super::const_iterator end) : super(begin, end) {}

AString::AString(const_iterator begin, const_iterator end) : super(begin, end) {}

AString::AString(const char* utf8_bytes, size_type length) {
if (simdutf::validate_utf8(utf8_bytes, length)) {
*this = std::string(utf8_bytes, length);
Expand Down Expand Up @@ -337,10 +142,6 @@ AString::AString(size_type n, AChar c) {
}
}

void AString::push_back(AChar c) noexcept {
append(c);
}

void AString::insert(size_type pos, AChar c) {
auto utf8c = c.toUtf8();
bytes().insert(bytes().begin() + aui::utf8::detail::findUnicodePos(bytes(), pos).valueOr(0), utf8c.begin(), utf8c.end());
Expand Down Expand Up @@ -388,21 +189,9 @@ AString& AString::append(char c) {
return *this;
}

AString& AString::append(AChar c) {
auto utf8c = c.toUtf8();
super::append(utf8c.begin(), utf8c.end());
return *this;
}


AString& AString::replaceAll(AChar from, AChar to) {
if (empty()) return *this;
for (auto it = begin(); it != end(); ++it) {
if (*it == from) {
it = to;
}
}
return *this;
return (*this = replacedAll(from, to));
}

AString& AString::replaceAll(AStringView from, AStringView to) {
Expand Down Expand Up @@ -522,61 +311,6 @@ void AString::resizeToNullTerminator() {
resize(end - current);
}

bool AString::startsWith(AChar prefix) const noexcept {
auto utf8p = prefix.toUtf8();
return startsWith(AStringView(utf8p.data(), utf8p.size()));
}

bool AString::endsWith(AChar suffix) const noexcept {
auto utf8s = suffix.toUtf8();
return endsWith(AStringView(utf8s.data(), utf8s.size()));
}

auto AString::erase(const_iterator it) -> iterator {
if (it == cend()) {
return end();
}

size_type byte_pos = it.getBytePos();

size_type temp_pos = byte_pos;
aui::utf8::detail::decodeUtf8At(data(), temp_pos, size());
size_type char_byte_length = temp_pos - byte_pos;

super::erase(byte_pos, char_byte_length);

return iterator(this, byte_pos);
}

auto AString::erase(const_iterator begin, const_iterator end) -> iterator {
if (begin == cend() || begin == end) {
return iterator(this, begin == cend() ? size() : begin.getBytePos());
}

if (end == cend()) {
end = cend();
}

size_type begin_byte_pos = begin.getBytePos();
size_type end_byte_pos = end.getBytePos();

if (begin_byte_pos >= end_byte_pos) {
return iterator(this, begin_byte_pos);
}

size_type bytes_to_erase = end_byte_pos - begin_byte_pos;

super::erase(begin_byte_pos, bytes_to_erase);

return iterator(this, begin_byte_pos);
}

void AString::erase(size_t u_pos, size_t u_count) {
erase(begin() + u_pos, begin() + u_pos + u_count);
}

AStringVector AString::split(AChar c) const {
return view().split(c);
}

// NOLINTEND(cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-pro-bounds-pointer-arithmetic)
Loading
Loading