diff --git a/AK/CMakeLists.txt b/AK/CMakeLists.txt index 15ebea36de..235a549833 100644 --- a/AK/CMakeLists.txt +++ b/AK/CMakeLists.txt @@ -28,6 +28,7 @@ set(AK_SOURCES StackInfo.cpp Stream.cpp String.cpp + StringBase.cpp StringBuilder.cpp StringFloatingPointConversions.cpp StringImpl.cpp diff --git a/AK/String.cpp b/AK/String.cpp index 8286084063..d44e3949c6 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -122,49 +122,6 @@ void StringData::compute_hash() const } -String::String(NonnullRefPtr data) - : m_data(&data.leak_ref()) -{ -} - -String::String(String const& other) - : m_data(other.m_data) -{ - if (!is_short_string()) - m_data->ref(); -} - -String::String(String&& other) - : m_data(exchange(other.m_data, nullptr)) -{ - other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG; -} - -String& String::operator=(String&& other) -{ - if (!is_short_string()) - m_data->unref(); - - m_data = exchange(other.m_data, nullptr); - other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG; - return *this; -} - -String& String::operator=(String const& other) -{ - if (&other != this) { - if (!is_short_string()) - m_data->unref(); - - m_data = other.m_data; - - if (!is_short_string()) - m_data->ref(); - } - - return *this; -} - void String::destroy_string() { if (!is_short_string()) @@ -491,21 +448,6 @@ bool String::ends_with_bytes(StringView bytes, CaseSensitivity case_sensitivity) return bytes_as_string_view().ends_with(bytes, case_sensitivity); } -bool String::is_short_string() const -{ - return has_short_string_bit(reinterpret_cast(m_data)); -} - -ReadonlyBytes String::ShortString::bytes() const -{ - return { storage, byte_count() }; -} - -size_t String::ShortString::byte_count() const -{ - return byte_count_and_short_string_flag >> 1; -} - unsigned Traits::hash(String const& string) { return string.hash(); diff --git a/AK/String.h b/AK/String.h index 4d2a634b53..1f25f663d8 100644 --- a/AK/String.h +++ b/AK/String.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -25,10 +26,6 @@ namespace AK { -namespace Detail { -class StringData; -} - // FIXME: Remove this when OpenBSD Clang fully supports consteval. // And once oss-fuzz updates to clang >15. // And once Android ships an NDK with clang >14 @@ -41,16 +38,15 @@ class StringData; // String is a strongly owned sequence of Unicode code points encoded as UTF-8. // The data may or may not be heap-allocated, and may or may not be reference counted. // There is no guarantee that the underlying bytes are null-terminated. -class String { +class String : public Detail::StringBase { + AK_MAKE_DEFAULT_COPYABLE(String); + AK_MAKE_DEFAULT_MOVABLE(String); + public: // NOTE: For short strings, we avoid heap allocations by storing them in the data pointer slot. - static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = sizeof(Detail::StringData*) - 1; + static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = Detail::MAX_SHORT_STRING_BYTE_COUNT; - String(String const&); - String(String&&); - - String& operator=(String&&); - String& operator=(String const&); + using StringBase::StringBase; constexpr ~String() { @@ -60,7 +56,7 @@ public: // Creates an empty (zero-length) String. constexpr String() - : String(ShortString { SHORT_STRING_FLAG, {} }) + : StringBase(ShortString { SHORT_STRING_FLAG, {} }) { } @@ -199,9 +195,6 @@ public: return builder.to_string(); } - // NOTE: This is primarily interesting to unit tests. - [[nodiscard]] bool is_short_string() const; - [[nodiscard]] static String fly_string_data_to_string(Badge, uintptr_t const&); [[nodiscard]] static StringView fly_string_data_to_string_view(Badge, uintptr_t const&); [[nodiscard]] static u32 fly_string_data_to_hash(Badge, uintptr_t const&); @@ -219,36 +212,9 @@ public: static ErrorOr from_byte_string(T&&) = delete; private: - // NOTE: If the least significant bit of the pointer is set, this is a short string. - static constexpr uintptr_t SHORT_STRING_FLAG = 1; - - static constexpr bool has_short_string_bit(uintptr_t data) - { - return (data & SHORT_STRING_FLAG) != 0; - } - - struct ShortString { - ReadonlyBytes bytes() const; - size_t byte_count() const; - - // NOTE: This is the byte count shifted left 1 step and or'ed with a 1 (the SHORT_STRING_FLAG) - u8 byte_count_and_short_string_flag { 0 }; - u8 storage[MAX_SHORT_STRING_BYTE_COUNT] = { 0 }; - }; - - explicit String(NonnullRefPtr); - - explicit constexpr String(ShortString short_string) - : m_short_string(short_string) - { - } + using ShortString = Detail::ShortString; void destroy_string(); - - union { - ShortString m_short_string; - Detail::StringData const* m_data { nullptr }; - }; }; template<> diff --git a/AK/StringBase.cpp b/AK/StringBase.cpp new file mode 100644 index 0000000000..0f04bf3202 --- /dev/null +++ b/AK/StringBase.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023, Dan Klishch + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace AK::Detail { + +ReadonlyBytes ShortString::bytes() const +{ + return { storage, byte_count() }; +} + +size_t ShortString::byte_count() const +{ + return byte_count_and_short_string_flag >> 1; +} + +StringBase::StringBase(NonnullRefPtr data) + : m_data(&data.leak_ref()) +{ +} + +StringBase::StringBase(StringBase const& other) + : m_data(other.m_data) +{ + if (!is_short_string()) + m_data->ref(); +} + +StringBase& StringBase::operator=(StringBase&& other) +{ + if (!is_short_string()) + m_data->unref(); + + m_data = exchange(other.m_data, nullptr); + other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG; + return *this; +} + +StringBase& StringBase::operator=(StringBase const& other) +{ + if (&other != this) { + if (!is_short_string()) + m_data->unref(); + + m_data = other.m_data; + if (!is_short_string()) + m_data->ref(); + } + return *this; +} + +bool StringBase::is_short_string() const +{ + return has_short_string_bit(reinterpret_cast(m_data)); +} + +} diff --git a/AK/StringBase.h b/AK/StringBase.h new file mode 100644 index 0000000000..bdceeac351 --- /dev/null +++ b/AK/StringBase.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023, Dan Klishch + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include + +namespace AK::Detail { + +class StringData; + +static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = sizeof(StringData*) - 1; + +struct ShortString { + ReadonlyBytes bytes() const; + size_t byte_count() const; + + // NOTE: This is the byte count shifted left 1 step and or'ed with a 1 (the SHORT_STRING_FLAG) + u8 byte_count_and_short_string_flag { 0 }; + u8 storage[MAX_SHORT_STRING_BYTE_COUNT] = { 0 }; +}; + +static_assert(HostIsLittleEndian, "Order of fields in ShortString assumes LE."); +static_assert(sizeof(ShortString) >= sizeof(StringData*)); +static_assert(__builtin_offsetof(ShortString, byte_count_and_short_string_flag) == 0); + +class StringBase { +public: + StringBase(StringBase const&); + + constexpr StringBase(StringBase&& other) + : m_short_string(other.m_short_string) + { + other.m_short_string = ShortString {}; + other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG; + } + + StringBase& operator=(StringBase&&); + StringBase& operator=(StringBase const&); + + // NOTE: This is primarily interesting to unit tests. + [[nodiscard]] bool is_short_string() const; + +protected: + // NOTE: If the least significant bit of the pointer is set, this is a short string. + static constexpr uintptr_t SHORT_STRING_FLAG = 1; + + static constexpr bool has_short_string_bit(uintptr_t data) + { + return (data & SHORT_STRING_FLAG) != 0; + } + + explicit StringBase(NonnullRefPtr); + + explicit constexpr StringBase(ShortString short_string) + : m_short_string(short_string) + { + } + + union { + ShortString m_short_string; + Detail::StringData const* m_data { nullptr }; + }; +}; + +}