mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 18:47:44 +00:00
LibJS: Resolve rope strings directly to UTF-16 when preferable
When someone calls PrimitiveString::utf16_string() on a rope string, we know for sure that the client wants a UTF-16 string and may not be interested in a UTF-8 version at all. To avoid round-tripping through UTF-8 in this scenario, callers can now inform resolve_rope_if_needed() about their preferred encoding, should rope resolution take place. The UTF-16 case is actually a lot simpler than the UTF-8 case, since we can simply ask for UTF-16 data for each fiber of the rope, and then concatenate all the fibers. Since LibJS always uses UTF-16 for regular expression matching, this avoids round-tripping through UTF-8 whenever the input to a regex test is already UTF-16. :^)
This commit is contained in:
parent
e78ea08ed9
commit
a3e4535f34
2 changed files with 24 additions and 23 deletions
|
@ -77,7 +77,7 @@ bool PrimitiveString::is_empty() const
|
||||||
ThrowCompletionOr<String> PrimitiveString::utf8_string() const
|
ThrowCompletionOr<String> PrimitiveString::utf8_string() const
|
||||||
{
|
{
|
||||||
auto& vm = this->vm();
|
auto& vm = this->vm();
|
||||||
TRY(resolve_rope_if_needed());
|
TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
|
||||||
|
|
||||||
if (!has_utf8_string()) {
|
if (!has_utf8_string()) {
|
||||||
if (has_deprecated_string())
|
if (has_deprecated_string())
|
||||||
|
@ -99,7 +99,7 @@ ThrowCompletionOr<StringView> PrimitiveString::utf8_string_view() const
|
||||||
|
|
||||||
ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
|
ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
|
||||||
{
|
{
|
||||||
TRY(resolve_rope_if_needed());
|
TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
|
||||||
|
|
||||||
if (!has_deprecated_string()) {
|
if (!has_deprecated_string()) {
|
||||||
if (has_utf8_string())
|
if (has_utf8_string())
|
||||||
|
@ -115,7 +115,7 @@ ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
|
||||||
|
|
||||||
ThrowCompletionOr<Utf16String> PrimitiveString::utf16_string() const
|
ThrowCompletionOr<Utf16String> PrimitiveString::utf16_string() const
|
||||||
{
|
{
|
||||||
TRY(resolve_rope_if_needed());
|
TRY(resolve_rope_if_needed(EncodingPreference::UTF16));
|
||||||
|
|
||||||
if (!has_utf16_string()) {
|
if (!has_utf16_string()) {
|
||||||
if (has_utf8_string()) {
|
if (has_utf8_string()) {
|
||||||
|
@ -245,31 +245,13 @@ NonnullGCPtr<PrimitiveString> PrimitiveString::create(VM& vm, PrimitiveString& l
|
||||||
return vm.heap().allocate_without_realm<PrimitiveString>(lhs, rhs);
|
return vm.heap().allocate_without_realm<PrimitiveString>(lhs, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
|
ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed(EncodingPreference preference) const
|
||||||
{
|
{
|
||||||
if (!m_is_rope)
|
if (!m_is_rope)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
auto& vm = this->vm();
|
auto& vm = this->vm();
|
||||||
|
|
||||||
// NOTE: Special case for two concatenated UTF-16 strings.
|
|
||||||
// This is here as an optimization, although I'm unsure how valuable it is.
|
|
||||||
if (m_lhs->has_utf16_string() && m_rhs->has_utf16_string()) {
|
|
||||||
auto const& lhs_string = m_lhs->m_utf16_string.value();
|
|
||||||
auto const& rhs_string = m_rhs->m_utf16_string.value();
|
|
||||||
|
|
||||||
Utf16Data combined;
|
|
||||||
TRY_OR_THROW_OOM(vm, combined.try_ensure_capacity(lhs_string.length_in_code_units() + rhs_string.length_in_code_units()));
|
|
||||||
combined.extend(lhs_string.string());
|
|
||||||
combined.extend(rhs_string.string());
|
|
||||||
|
|
||||||
m_utf16_string = TRY(Utf16String::create(vm, move(combined)));
|
|
||||||
m_is_rope = false;
|
|
||||||
m_lhs = nullptr;
|
|
||||||
m_rhs = nullptr;
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
// This vector will hold all the pieces of the rope that need to be assembled
|
// This vector will hold all the pieces of the rope that need to be assembled
|
||||||
// into the resolved string.
|
// into the resolved string.
|
||||||
Vector<PrimitiveString const*> pieces;
|
Vector<PrimitiveString const*> pieces;
|
||||||
|
@ -289,6 +271,21 @@ ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
|
||||||
TRY_OR_THROW_OOM(vm, pieces.try_append(current));
|
TRY_OR_THROW_OOM(vm, pieces.try_append(current));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (preference == EncodingPreference::UTF16) {
|
||||||
|
// The caller wants a UTF-16 string, so we can simply concatenate all the pieces
|
||||||
|
// into a UTF-16 code unit buffer and create a Utf16String from it.
|
||||||
|
|
||||||
|
Utf16Data code_units;
|
||||||
|
for (auto const* current : pieces)
|
||||||
|
code_units.extend(TRY(current->utf16_string()).string());
|
||||||
|
|
||||||
|
m_utf16_string = TRY(Utf16String::create(vm, move(code_units)));
|
||||||
|
m_is_rope = false;
|
||||||
|
m_lhs = nullptr;
|
||||||
|
m_rhs = nullptr;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
// Now that we have all the pieces, we can concatenate them using a StringBuilder.
|
// Now that we have all the pieces, we can concatenate them using a StringBuilder.
|
||||||
ThrowableStringBuilder builder(vm);
|
ThrowableStringBuilder builder(vm);
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,11 @@ private:
|
||||||
|
|
||||||
virtual void visit_edges(Cell::Visitor&) override;
|
virtual void visit_edges(Cell::Visitor&) override;
|
||||||
|
|
||||||
ThrowCompletionOr<void> resolve_rope_if_needed() const;
|
enum class EncodingPreference {
|
||||||
|
UTF8,
|
||||||
|
UTF16,
|
||||||
|
};
|
||||||
|
ThrowCompletionOr<void> resolve_rope_if_needed(EncodingPreference) const;
|
||||||
|
|
||||||
mutable bool m_is_rope { false };
|
mutable bool m_is_rope { false };
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue