From a3e4535f34bc87af709a008245d120bbb2619250 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 13 Jul 2023 15:10:57 +0200 Subject: [PATCH] LibJS: Resolve rope strings directly to UTF-16 when preferable When someone calls PrimitiveString::utf16_string() on a rope string, we know for sure that the client wants a UTF-16 string and may not be interested in a UTF-8 version at all. To avoid round-tripping through UTF-8 in this scenario, callers can now inform resolve_rope_if_needed() about their preferred encoding, should rope resolution take place. The UTF-16 case is actually a lot simpler than the UTF-8 case, since we can simply ask for UTF-16 data for each fiber of the rope, and then concatenate all the fibers. Since LibJS always uses UTF-16 for regular expression matching, this avoids round-tripping through UTF-8 whenever the input to a regex test is already UTF-16. :^) --- .../LibJS/Runtime/PrimitiveString.cpp | 41 +++++++++---------- .../Libraries/LibJS/Runtime/PrimitiveString.h | 6 ++- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp index f2ff244d96..15e1c373ca 100644 --- a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp +++ b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp @@ -77,7 +77,7 @@ bool PrimitiveString::is_empty() const ThrowCompletionOr PrimitiveString::utf8_string() const { auto& vm = this->vm(); - TRY(resolve_rope_if_needed()); + TRY(resolve_rope_if_needed(EncodingPreference::UTF8)); if (!has_utf8_string()) { if (has_deprecated_string()) @@ -99,7 +99,7 @@ ThrowCompletionOr PrimitiveString::utf8_string_view() const ThrowCompletionOr PrimitiveString::deprecated_string() const { - TRY(resolve_rope_if_needed()); + TRY(resolve_rope_if_needed(EncodingPreference::UTF8)); if (!has_deprecated_string()) { if (has_utf8_string()) @@ -115,7 +115,7 @@ ThrowCompletionOr PrimitiveString::deprecated_string() const ThrowCompletionOr PrimitiveString::utf16_string() const { - TRY(resolve_rope_if_needed()); + TRY(resolve_rope_if_needed(EncodingPreference::UTF16)); if (!has_utf16_string()) { if (has_utf8_string()) { @@ -245,31 +245,13 @@ NonnullGCPtr PrimitiveString::create(VM& vm, PrimitiveString& l return vm.heap().allocate_without_realm(lhs, rhs); } -ThrowCompletionOr PrimitiveString::resolve_rope_if_needed() const +ThrowCompletionOr PrimitiveString::resolve_rope_if_needed(EncodingPreference preference) const { if (!m_is_rope) return {}; auto& vm = this->vm(); - // NOTE: Special case for two concatenated UTF-16 strings. - // This is here as an optimization, although I'm unsure how valuable it is. - if (m_lhs->has_utf16_string() && m_rhs->has_utf16_string()) { - auto const& lhs_string = m_lhs->m_utf16_string.value(); - auto const& rhs_string = m_rhs->m_utf16_string.value(); - - Utf16Data combined; - TRY_OR_THROW_OOM(vm, combined.try_ensure_capacity(lhs_string.length_in_code_units() + rhs_string.length_in_code_units())); - combined.extend(lhs_string.string()); - combined.extend(rhs_string.string()); - - m_utf16_string = TRY(Utf16String::create(vm, move(combined))); - m_is_rope = false; - m_lhs = nullptr; - m_rhs = nullptr; - return {}; - } - // This vector will hold all the pieces of the rope that need to be assembled // into the resolved string. Vector pieces; @@ -289,6 +271,21 @@ ThrowCompletionOr PrimitiveString::resolve_rope_if_needed() const TRY_OR_THROW_OOM(vm, pieces.try_append(current)); } + if (preference == EncodingPreference::UTF16) { + // The caller wants a UTF-16 string, so we can simply concatenate all the pieces + // into a UTF-16 code unit buffer and create a Utf16String from it. + + Utf16Data code_units; + for (auto const* current : pieces) + code_units.extend(TRY(current->utf16_string()).string()); + + m_utf16_string = TRY(Utf16String::create(vm, move(code_units))); + m_is_rope = false; + m_lhs = nullptr; + m_rhs = nullptr; + return {}; + } + // Now that we have all the pieces, we can concatenate them using a StringBuilder. ThrowableStringBuilder builder(vm); diff --git a/Userland/Libraries/LibJS/Runtime/PrimitiveString.h b/Userland/Libraries/LibJS/Runtime/PrimitiveString.h index 886cf219a6..63822aa752 100644 --- a/Userland/Libraries/LibJS/Runtime/PrimitiveString.h +++ b/Userland/Libraries/LibJS/Runtime/PrimitiveString.h @@ -59,7 +59,11 @@ private: virtual void visit_edges(Cell::Visitor&) override; - ThrowCompletionOr resolve_rope_if_needed() const; + enum class EncodingPreference { + UTF8, + UTF16, + }; + ThrowCompletionOr resolve_rope_if_needed(EncodingPreference) const; mutable bool m_is_rope { false };