1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 04:27:44 +00:00

LibJS: Implement String.prototype.replace with UTF-16 code units

This commit is contained in:
Timothy Flynn 2021-07-19 16:53:35 -04:00 committed by Andreas Kling
parent bdbe716547
commit 06208aaa15
3 changed files with 64 additions and 23 deletions

View file

@ -10,6 +10,7 @@
#include <AK/Optional.h> #include <AK/Optional.h>
#include <AK/Result.h> #include <AK/Result.h>
#include <AK/TemporaryChange.h> #include <AK/TemporaryChange.h>
#include <AK/Utf16View.h>
#include <LibJS/Interpreter.h> #include <LibJS/Interpreter.h>
#include <LibJS/Parser.h> #include <LibJS/Parser.h>
#include <LibJS/Runtime/AbstractOperations.h> #include <LibJS/Runtime/AbstractOperations.h>
@ -583,36 +584,51 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri
if (vm.exception()) if (vm.exception())
return {}; return {};
// FIXME: Once RegExp.prototype supports UTF-16, this AO can take UTF-16 strings as parameters instead of having to transcode here.
auto utf16_matched = AK::utf8_to_utf16(matched);
auto match_length = utf16_matched.size();
auto utf16_string = AK::utf8_to_utf16(str);
Utf16View utf16_string_view { utf16_string };
auto string_length = utf16_string_view.length_in_code_units();
auto utf16_replace = AK::utf8_to_utf16(replace_string);
Utf16View utf16_replace_view { utf16_replace };
auto replace_length = utf16_replace_view.length_in_code_units();
StringBuilder result; StringBuilder result;
for (size_t i = 0; i < replace_string.length(); ++i) { for (size_t i = 0; i < replace_length; ++i) {
char curr = replace_string[i]; u16 curr = utf16_replace_view.code_unit_at(i);
if ((curr != '$') || (i + 1 >= replace_string.length())) { if ((curr != '$') || (i + 1 >= replace_length)) {
result.append(curr); result.append(curr);
continue; continue;
} }
char next = replace_string[i + 1]; u16 next = utf16_replace_view.code_unit_at(i + 1);
if (next == '$') { if (next == '$') {
result.append(next); result.append('$');
++i; ++i;
} else if (next == '&') { } else if (next == '&') {
result.append(matched); result.append(matched);
++i; ++i;
} else if (next == '`') { } else if (next == '`') {
result.append(str.substring_view(0, position)); auto substring = utf16_string_view.substring_view(0, position);
result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
++i; ++i;
} else if (next == '\'') { } else if (next == '\'') {
auto tail_pos = position + matched.length(); auto tail_pos = position + match_length;
if (tail_pos < str.length()) if (tail_pos < string_length) {
result.append(str.substring_view(tail_pos)); auto substring = utf16_string_view.substring_view(tail_pos);
result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
}
++i; ++i;
} else if (is_ascii_digit(next)) { } else if (is_ascii_digit(next)) {
bool is_two_digits = (i + 2 < replace_string.length()) && is_ascii_digit(replace_string[i + 2]); bool is_two_digits = (i + 2 < replace_length) && is_ascii_digit(utf16_replace_view.code_unit_at(i + 2));
auto capture_postition_string = replace_string.substring_view(i + 1, is_two_digits ? 2 : 1); auto capture_postition_string = utf16_replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
auto capture_position = capture_postition_string.to_uint(); auto capture_position = capture_postition_string.to_uint();
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) { if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
@ -632,12 +648,20 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri
} }
} else if (next == '<') { } else if (next == '<') {
auto start_position = i + 2; auto start_position = i + 2;
auto end_position = replace_string.find('>', start_position); Optional<size_t> end_position;
for (size_t j = start_position; j < replace_length; ++j) {
if (utf16_replace_view.code_unit_at(j) == '>') {
end_position = j;
break;
}
}
if (named_captures.is_undefined() || !end_position.has_value()) { if (named_captures.is_undefined() || !end_position.has_value()) {
result.append(curr); result.append(curr);
} else { } else {
auto group_name = replace_string.substring(start_position, *end_position - start_position); auto group_name_view = utf16_replace_view.substring_view(start_position, *end_position - start_position);
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
auto capture = named_captures.as_object().get(group_name); auto capture = named_captures.as_object().get(group_name);
if (vm.exception()) if (vm.exception())

View file

@ -903,24 +903,29 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace)
return {}; return {};
if (!replace_value.is_function()) { if (!replace_value.is_function()) {
auto replace_string = replace_value.to_string(global_object); auto replace_string = replace_value.to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};
replace_value = js_string(vm, Utf16View { replace_string });
replace_value = js_string(vm, move(replace_string));
if (vm.exception()) if (vm.exception())
return {}; return {};
} }
Optional<size_t> position = string.find(search_string); auto utf16_string = AK::utf8_to_utf16(string);
if (!position.has_value()) Utf16View utf16_string_view { utf16_string };
return js_string(vm, string);
auto preserved = string.substring(0, position.value()); auto utf16_search_string = AK::utf8_to_utf16(search_string);
Utf16View utf16_search_view { utf16_search_string };
Optional<size_t> position = string_index_of(utf16_string_view, utf16_search_view, 0);
if (!position.has_value())
return js_string(vm, utf16_string_view);
auto preserved = utf16_string_view.substring_view(0, position.value());
String replacement; String replacement;
if (replace_value.is_function()) { if (replace_value.is_function()) {
auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, search_string), Value(position.value()), js_string(vm, string)); auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, utf16_search_view), Value(position.value()), js_string(vm, utf16_string_view));
if (vm.exception()) if (vm.exception())
return {}; return {};
@ -933,10 +938,12 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace)
return {}; return {};
} }
utf16_string_view = utf16_string_view.substring_view(*position + utf16_search_view.length_in_code_units());
StringBuilder builder; StringBuilder builder;
builder.append(preserved); builder.append(preserved.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
builder.append(replacement); builder.append(replacement);
builder.append(string.substring(position.value() + search_string.length())); builder.append(utf16_string_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
return js_string(vm, builder.build()); return js_string(vm, builder.build());
} }

View file

@ -232,3 +232,13 @@ test("override exec with non-function", () => {
re.exec = 3; re.exec = 3;
expect("test".replace(re, "x")).toBe("x"); expect("test".replace(re, "x")).toBe("x");
}); });
test("UTF-16", () => {
expect("😀".replace("😀", "")).toBe("");
expect("😀".replace("\ud83d", "")).toBe("\ude00");
expect("😀".replace("\ude00", "")).toBe("\ud83d");
// FIXME: RegExp.prototype [ @@replace ] also needs to support UTF-16.
// expect("😀".replace(/\ud83d/, "")).toBe("\ude00");
// expect("😀".replace(/\ude00/, "")).toBe("\ud83d");
});