1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 05:27:43 +00:00

LibJS: Implement RegExp.prototype [ @@replace ] with UTF-16 code units

This also converts the GetSubstitution abstract operation take its input
strings as UTF-16 now that all callers are UTF-16 capable. This means
String.prototype.replace (and replaceAll) no longer needs UTF-8 and
UTF-16 copies of these strings.
This commit is contained in:
Timothy Flynn 2021-07-22 10:38:10 -04:00 committed by Linus Groh
parent ee7b04f7bb
commit 5a8f870594
6 changed files with 67 additions and 71 deletions

View file

@ -576,59 +576,48 @@ Value canonical_numeric_index_string(GlobalObject& global_object, PropertyName c
}
// 22.1.3.17.1 GetSubstitution ( matched, str, position, captures, namedCaptures, replacement ), https://tc39.es/ecma262/#sec-getsubstitution
String get_substitution(GlobalObject& global_object, String const& matched, String const& str, size_t position, Vector<Value> const& captures, Value named_captures, Value replacement)
String get_substitution(GlobalObject& global_object, Utf16View const& matched, Utf16View const& str, size_t position, Vector<Value> const& captures, Value named_captures, Value replacement)
{
auto& vm = global_object.vm();
auto replace_string = replacement.to_string(global_object);
auto replace_string = replacement.to_utf16_string(global_object);
if (vm.exception())
return {};
// FIXME: Once RegExp.prototype supports UTF-16, this AO can take UTF-16 strings as parameters instead of having to transcode here.
auto utf16_matched = AK::utf8_to_utf16(matched);
auto match_length = utf16_matched.size();
auto utf16_string = AK::utf8_to_utf16(str);
Utf16View utf16_string_view { utf16_string };
auto string_length = utf16_string_view.length_in_code_units();
auto utf16_replace = AK::utf8_to_utf16(replace_string);
Utf16View utf16_replace_view { utf16_replace };
auto replace_length = utf16_replace_view.length_in_code_units();
Utf16View replace_view { replace_string };
StringBuilder result;
for (size_t i = 0; i < replace_length; ++i) {
u16 curr = utf16_replace_view.code_unit_at(i);
for (size_t i = 0; i < replace_view.length_in_code_units(); ++i) {
u16 curr = replace_view.code_unit_at(i);
if ((curr != '$') || (i + 1 >= replace_length)) {
if ((curr != '$') || (i + 1 >= replace_view.length_in_code_units())) {
result.append(curr);
continue;
}
u16 next = utf16_replace_view.code_unit_at(i + 1);
u16 next = replace_view.code_unit_at(i + 1);
if (next == '$') {
result.append('$');
++i;
} else if (next == '&') {
result.append(matched);
result.append(matched.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
++i;
} else if (next == '`') {
auto substring = utf16_string_view.substring_view(0, position);
auto substring = str.substring_view(0, position);
result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
++i;
} else if (next == '\'') {
auto tail_pos = position + match_length;
if (tail_pos < string_length) {
auto substring = utf16_string_view.substring_view(tail_pos);
auto tail_pos = position + matched.length_in_code_units();
if (tail_pos < str.length_in_code_units()) {
auto substring = str.substring_view(tail_pos);
result.append(substring.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
}
++i;
} else if (is_ascii_digit(next)) {
bool is_two_digits = (i + 2 < replace_length) && is_ascii_digit(utf16_replace_view.code_unit_at(i + 2));
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
auto capture_postition_string = utf16_replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
auto capture_postition_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
auto capture_position = capture_postition_string.to_uint();
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
@ -650,8 +639,8 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri
auto start_position = i + 2;
Optional<size_t> end_position;
for (size_t j = start_position; j < replace_length; ++j) {
if (utf16_replace_view.code_unit_at(j) == '>') {
for (size_t j = start_position; j < replace_view.length_in_code_units(); ++j) {
if (replace_view.code_unit_at(j) == '>') {
end_position = j;
break;
}
@ -660,7 +649,7 @@ String get_substitution(GlobalObject& global_object, String const& matched, Stri
if (named_captures.is_undefined() || !end_position.has_value()) {
result.append(curr);
} else {
auto group_name_view = utf16_replace_view.substring_view(start_position, *end_position - start_position);
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
auto capture = named_captures.as_object().get(group_name);