From 70080feab2247e3e8bb7780f5f499826bb85513c Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 2 Aug 2021 17:02:17 -0400 Subject: [PATCH] AK+LibJS: Implement String.from{CharCode,CodePoint} using UTF-16 strings Most of String.prototype and RegExp.prototype is implemented with UTF-16 so this is to prevent extra copying of the string data. --- AK/Utf16View.cpp | 24 ++++++++++++------- AK/Utf16View.h | 1 + .../LibJS/Runtime/StringConstructor.cpp | 24 ++++++++++++------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 5b7e2eb6ad..d49ba18692 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -25,15 +25,8 @@ static Vector to_utf16_impl(UtfViewType const& view) requires(IsSame utf16_data; - for (auto code_point : view) { - if (code_point < first_supplementary_plane_code_point) { - utf16_data.append(static_cast(code_point)); - } else { - code_point -= first_supplementary_plane_code_point; - utf16_data.append(static_cast(high_surrogate_min | (code_point >> 10))); - utf16_data.append(static_cast(low_surrogate_min | (code_point & 0x3ff))); - } - } + for (auto code_point : view) + code_point_to_utf16(utf16_data, code_point); return utf16_data; } @@ -53,6 +46,19 @@ Vector utf32_to_utf16(Utf32View const& utf32_view) return to_utf16_impl(utf32_view); } +void code_point_to_utf16(Vector& string, u32 code_point) +{ + VERIFY(is_unicode(code_point)); + + if (code_point < first_supplementary_plane_code_point) { + string.append(static_cast(code_point)); + } else { + code_point -= first_supplementary_plane_code_point; + string.append(static_cast(high_surrogate_min | (code_point >> 10))); + string.append(static_cast(low_surrogate_min | (code_point & 0x3ff))); + } +} + bool Utf16View::is_high_surrogate(u16 code_unit) { return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max); diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 054c9f4043..58e636adc0 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -18,6 +18,7 @@ namespace AK { Vector utf8_to_utf16(StringView const&); Vector utf8_to_utf16(Utf8View const&); Vector utf32_to_utf16(Utf32View const&); +void code_point_to_utf16(Vector&, u32); class Utf16View; diff --git a/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp b/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp index 80fa174d0a..f222e1b004 100644 --- a/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -124,22 +125,25 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::raw) // 22.1.2.1 String.fromCharCode ( ...codeUnits ), https://tc39.es/ecma262/#sec-string.fromcharcode JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code) { - StringBuilder builder; + Vector string; + string.ensure_capacity(vm.argument_count()); + for (size_t i = 0; i < vm.argument_count(); ++i) { - auto char_code = vm.argument(i).to_i32(global_object); + auto code_unit = vm.argument(i).to_u16(global_object); if (vm.exception()) return {}; - auto truncated = char_code & 0xffff; - // FIXME: We need an Utf16View :^) - builder.append(Utf32View((u32*)&truncated, 1)); + string.append(code_unit); } - return js_string(vm, builder.build()); + + return js_string(vm, move(string)); } // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) { - StringBuilder builder; + Vector string; + string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff. + for (size_t i = 0; i < vm.argument_count(); ++i) { auto next_code_point = vm.argument(i).to_number(global_object); if (vm.exception()) @@ -153,9 +157,11 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) vm.throw_exception(global_object, ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects()); return {}; } - builder.append_code_point(code_point); + + AK::code_point_to_utf16(string, static_cast(code_point)); } - return js_string(vm, builder.build()); + + return js_string(vm, move(string)); } }