diff --git a/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp index 5775335d9b..80ec6fb56f 100644 --- a/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -583,36 +584,51 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri if (vm.exception()) return {}; + // FIXME: Once RegExp.prototype supports UTF-16, this AO can take UTF-16 strings as parameters instead of having to transcode here. + auto utf16_matched = AK::utf8_to_utf16(matched); + auto match_length = utf16_matched.size(); + + auto utf16_string = AK::utf8_to_utf16(str); + Utf16View utf16_string_view { utf16_string }; + auto string_length = utf16_string_view.length_in_code_units(); + + auto utf16_replace = AK::utf8_to_utf16(replace_string); + Utf16View utf16_replace_view { utf16_replace }; + auto replace_length = utf16_replace_view.length_in_code_units(); + StringBuilder result; - for (size_t i = 0; i < replace_string.length(); ++i) { - char curr = replace_string[i]; + for (size_t i = 0; i < replace_length; ++i) { + u16 curr = utf16_replace_view.code_unit_at(i); - if ((curr != '$') || (i + 1 >= replace_string.length())) { + if ((curr != '$') || (i + 1 >= replace_length)) { result.append(curr); continue; } - char next = replace_string[i + 1]; + u16 next = utf16_replace_view.code_unit_at(i + 1); if (next == '$') { - result.append(next); + result.append('$'); ++i; } else if (next == '&') { result.append(matched); ++i; } else if (next == '`') { - result.append(str.substring_view(0, position)); + auto substring = utf16_string_view.substring_view(0, position); + result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); ++i; } else if (next == '\'') { - auto tail_pos = position + matched.length(); - if (tail_pos < str.length()) - result.append(str.substring_view(tail_pos)); + auto tail_pos = position + match_length; + if (tail_pos < string_length) { + auto substring = utf16_string_view.substring_view(tail_pos); + result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + } ++i; } else if (is_ascii_digit(next)) { - bool is_two_digits = (i + 2 < replace_string.length()) && is_ascii_digit(replace_string[i + 2]); + bool is_two_digits = (i + 2 < replace_length) && is_ascii_digit(utf16_replace_view.code_unit_at(i + 2)); - auto capture_postition_string = replace_string.substring_view(i + 1, is_two_digits ? 2 : 1); + auto capture_postition_string = utf16_replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8(); auto capture_position = capture_postition_string.to_uint(); if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) { @@ -632,12 +648,20 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri } } else if (next == '<') { auto start_position = i + 2; - auto end_position = replace_string.find('>', start_position); + Optional end_position; + + for (size_t j = start_position; j < replace_length; ++j) { + if (utf16_replace_view.code_unit_at(j) == '>') { + end_position = j; + break; + } + } if (named_captures.is_undefined() || !end_position.has_value()) { result.append(curr); } else { - auto group_name = replace_string.substring(start_position, *end_position - start_position); + auto group_name_view = utf16_replace_view.substring_view(start_position, *end_position - start_position); + auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); auto capture = named_captures.as_object().get(group_name); if (vm.exception()) diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index 8112b7d145..f98e93ec20 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -903,24 +903,29 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace) return {}; if (!replace_value.is_function()) { - auto replace_string = replace_value.to_string(global_object); + auto replace_string = replace_value.to_utf16_string(global_object); if (vm.exception()) return {}; - - replace_value = js_string(vm, move(replace_string)); + replace_value = js_string(vm, Utf16View { replace_string }); if (vm.exception()) return {}; } - Optional position = string.find(search_string); - if (!position.has_value()) - return js_string(vm, string); + auto utf16_string = AK::utf8_to_utf16(string); + Utf16View utf16_string_view { utf16_string }; - auto preserved = string.substring(0, position.value()); + auto utf16_search_string = AK::utf8_to_utf16(search_string); + Utf16View utf16_search_view { utf16_search_string }; + + Optional position = string_index_of(utf16_string_view, utf16_search_view, 0); + if (!position.has_value()) + return js_string(vm, utf16_string_view); + + auto preserved = utf16_string_view.substring_view(0, position.value()); String replacement; if (replace_value.is_function()) { - auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, search_string), Value(position.value()), js_string(vm, string)); + auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, utf16_search_view), Value(position.value()), js_string(vm, utf16_string_view)); if (vm.exception()) return {}; @@ -933,10 +938,12 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace) return {}; } + utf16_string_view = utf16_string_view.substring_view(*position + utf16_search_view.length_in_code_units()); + StringBuilder builder; - builder.append(preserved); + builder.append(preserved.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); builder.append(replacement); - builder.append(string.substring(position.value() + search_string.length())); + builder.append(utf16_string_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); return js_string(vm, builder.build()); } diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js index 446ff62816..b61c0e77e2 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js @@ -232,3 +232,13 @@ test("override exec with non-function", () => { re.exec = 3; expect("test".replace(re, "x")).toBe("x"); }); + +test("UTF-16", () => { + expect("😀".replace("😀", "")).toBe(""); + expect("😀".replace("\ud83d", "")).toBe("\ude00"); + expect("😀".replace("\ude00", "")).toBe("\ud83d"); + + // FIXME: RegExp.prototype [ @@replace ] also needs to support UTF-16. + // expect("😀".replace(/\ud83d/, "")).toBe("\ude00"); + // expect("😀".replace(/\ude00/, "")).toBe("\ud83d"); +});