1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 21:47:46 +00:00

LibJS: Implement string concatenation using ropes

Instead of concatenating string data every time you add two strings
together in JavaScript, we now create a new PrimitiveString that points
to the two concatenated strings instead.

This turns concatenated strings into a tree structure that doesn't have
to be serialized until someone wants the characters in the string.

This *dramatically* reduces the peak memory footprint when running
the SunSpider benchmark (from ~6G to ~1G on my machine). It's also
significantly faster (1.39x) :^)
This commit is contained in:
Andreas Kling 2022-08-05 23:58:47 +02:00
parent cf62d08b2a
commit 64b29eb459
3 changed files with 172 additions and 59 deletions

View file

@ -1074,58 +1074,6 @@ ThrowCompletionOr<Value> unsigned_right_shift(GlobalObject& global_object, Value
return vm.throw_completion<TypeError>(global_object, ErrorType::BigIntBadOperator, "unsigned right-shift");
}
// https://tc39.es/ecma262/#string-concatenation
static PrimitiveString* concatenate_strings(GlobalObject& global_object, PrimitiveString const& lhs, PrimitiveString const& rhs)
{
auto& vm = global_object.vm();
if (lhs.has_utf16_string() && rhs.has_utf16_string()) {
auto const& lhs_string = lhs.utf16_string();
auto const& rhs_string = rhs.utf16_string();
Vector<u16, 1> combined;
combined.ensure_capacity(lhs_string.length_in_code_units() + rhs_string.length_in_code_units());
combined.extend(lhs_string.string());
combined.extend(rhs_string.string());
return js_string(vm, Utf16String(move(combined)));
}
auto const& lhs_string = lhs.string();
auto const& rhs_string = rhs.string();
StringBuilder builder(lhs_string.length() + rhs_string.length());
auto return_combined_strings = [&]() {
builder.append(lhs_string);
builder.append(rhs_string);
return js_string(vm, builder.to_string());
};
// Surrogates encoded as UTF-8 are 3 bytes.
if ((lhs_string.length() < 3) || (rhs_string.length() < 3))
return return_combined_strings();
auto lhs_leading_byte = static_cast<u8>(lhs_string[lhs_string.length() - 3]);
auto rhs_leading_byte = static_cast<u8>(rhs_string[0]);
if ((lhs_leading_byte & 0xf0) != 0xe0)
return return_combined_strings();
if ((rhs_leading_byte & 0xf0) != 0xe0)
return return_combined_strings();
auto high_surrogate = *Utf8View(lhs_string.substring_view(lhs_string.length() - 3)).begin();
auto low_surrogate = *Utf8View(rhs_string).begin();
if (!Utf16View::is_high_surrogate(high_surrogate) || !Utf16View::is_low_surrogate(low_surrogate))
return return_combined_strings();
builder.append(lhs_string.substring_view(0, lhs_string.length() - 3));
builder.append_code_point(Utf16View::decode_surrogate_pair(high_surrogate, low_surrogate));
builder.append(rhs_string.substring_view(3));
return js_string(vm, builder.to_string());
}
// 13.8.1 The Addition Operator ( + ), https://tc39.es/ecma262/#sec-addition-operator-plus
ThrowCompletionOr<Value> add(GlobalObject& global_object, Value lhs, Value rhs)
{
@ -1146,7 +1094,7 @@ ThrowCompletionOr<Value> add(GlobalObject& global_object, Value lhs, Value rhs)
if (lhs_primitive.is_string() || rhs_primitive.is_string()) {
auto lhs_string = TRY(lhs_primitive.to_primitive_string(global_object));
auto rhs_string = TRY(rhs_primitive.to_primitive_string(global_object));
return concatenate_strings(global_object, *lhs_string, *rhs_string);
return js_rope_string(global_object.vm(), *lhs_string, *rhs_string);
}
auto lhs_numeric = TRY(lhs_primitive.to_numeric(global_object));