From 733a92820bfe1e4e30531c65226a06539d5dea11 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 19 Jul 2021 17:02:44 -0400 Subject: [PATCH] LibJS: Implement String.prototype.replaceAll with UTF-16 code units --- .../LibJS/Runtime/StringPrototype.cpp | 33 ++++++++++++------- .../String/String.prototype.replaceAll.js | 12 +++++++ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index f98e93ec20..acd43b7063 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -997,33 +997,40 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace_all) return {}; if (!replace_value.is_function()) { - auto replace_string = replace_value.to_string(global_object); + auto replace_string = replace_value.to_utf16_string(global_object); if (vm.exception()) return {}; - - replace_value = js_string(vm, move(replace_string)); + replace_value = js_string(vm, Utf16View { replace_string }); if (vm.exception()) return {}; } + auto utf16_string = AK::utf8_to_utf16(string); + Utf16View utf16_string_view { utf16_string }; + auto string_length = utf16_string_view.length_in_code_units(); + + auto utf16_search_string = AK::utf8_to_utf16(search_string); + Utf16View utf16_search_view { utf16_search_string }; + auto search_length = utf16_search_view.length_in_code_units(); + Vector match_positions; - size_t advance_by = max(1u, search_string.length()); - auto position = string.find(search_string); + size_t advance_by = max(1u, search_length); + auto position = string_index_of(utf16_string_view, utf16_search_view, 0); while (position.has_value()) { match_positions.append(*position); - position = string.find(search_string, *position + advance_by); + position = string_index_of(utf16_string_view, utf16_search_view, *position + advance_by); } size_t end_of_last_match = 0; StringBuilder result; for (auto position : match_positions) { - auto preserved = string.substring_view(end_of_last_match, position - end_of_last_match); + auto preserved = utf16_string_view.substring_view(end_of_last_match, position - end_of_last_match); String replacement; if (replace_value.is_function()) { - auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, search_string), Value(position), js_string(vm, string)); + auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, utf16_search_view), Value(position), js_string(vm, utf16_string_view)); if (vm.exception()) return {}; @@ -1036,14 +1043,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace_all) return {}; } - result.append(preserved); + result.append(preserved.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); result.append(replacement); - end_of_last_match = position + search_string.length(); + end_of_last_match = position + search_length; } - if (end_of_last_match < string.length()) - result.append(string.substring_view(end_of_last_match)); + if (end_of_last_match < string_length) { + utf16_string_view = utf16_string_view.substring_view(end_of_last_match); + result.append(utf16_string_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + } return js_string(vm, result.build()); } diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replaceAll.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replaceAll.js index d2a6d3be9f..271bcc2bc9 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replaceAll.js +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replaceAll.js @@ -143,3 +143,15 @@ test("search value is coerced to a string", () => { expect(newString).toBe("abc"); expect(coerced).toBe("x"); }); + +test("UTF-16", () => { + expect("😀".replaceAll("😀", "")).toBe(""); + expect("😀".replaceAll("\ud83d", "")).toBe("\ude00"); + expect("😀".replaceAll("\ude00", "")).toBe("\ud83d"); + expect("😀😀😀".replaceAll("\ud83d", "")).toBe("\ude00\ude00\ude00"); + expect("😀😀😀".replaceAll("\ude00", "")).toBe("\ud83d\ud83d\ud83d"); + + // FIXME: RegExp.prototype [ @@replace ] also needs to support UTF-16. + // expect("😀".replaceAll(/\ud83d/g, "")).toBe("\ude00"); + // expect("😀".replaceAll(/\ude00/g, "")).toBe("\ud83d"); +});