1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 15:27:35 +00:00

LibJS: Implement String.prototype.replaceAll with UTF-16 code units

This commit is contained in:
Timothy Flynn 2021-07-19 17:02:44 -04:00 committed by Andreas Kling
parent 06208aaa15
commit 733a92820b
2 changed files with 33 additions and 12 deletions

View file

@ -997,33 +997,40 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace_all)
return {}; return {};
if (!replace_value.is_function()) { if (!replace_value.is_function()) {
auto replace_string = replace_value.to_string(global_object); auto replace_string = replace_value.to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};
replace_value = js_string(vm, Utf16View { replace_string });
replace_value = js_string(vm, move(replace_string));
if (vm.exception()) if (vm.exception())
return {}; return {};
} }
auto utf16_string = AK::utf8_to_utf16(string);
Utf16View utf16_string_view { utf16_string };
auto string_length = utf16_string_view.length_in_code_units();
auto utf16_search_string = AK::utf8_to_utf16(search_string);
Utf16View utf16_search_view { utf16_search_string };
auto search_length = utf16_search_view.length_in_code_units();
Vector<size_t> match_positions; Vector<size_t> match_positions;
size_t advance_by = max(1u, search_string.length()); size_t advance_by = max(1u, search_length);
auto position = string.find(search_string); auto position = string_index_of(utf16_string_view, utf16_search_view, 0);
while (position.has_value()) { while (position.has_value()) {
match_positions.append(*position); match_positions.append(*position);
position = string.find(search_string, *position + advance_by); position = string_index_of(utf16_string_view, utf16_search_view, *position + advance_by);
} }
size_t end_of_last_match = 0; size_t end_of_last_match = 0;
StringBuilder result; StringBuilder result;
for (auto position : match_positions) { for (auto position : match_positions) {
auto preserved = string.substring_view(end_of_last_match, position - end_of_last_match); auto preserved = utf16_string_view.substring_view(end_of_last_match, position - end_of_last_match);
String replacement; String replacement;
if (replace_value.is_function()) { if (replace_value.is_function()) {
auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, search_string), Value(position), js_string(vm, string)); auto result = vm.call(replace_value.as_function(), js_undefined(), js_string(vm, utf16_search_view), Value(position), js_string(vm, utf16_string_view));
if (vm.exception()) if (vm.exception())
return {}; return {};
@ -1036,14 +1043,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace_all)
return {}; return {};
} }
result.append(preserved); result.append(preserved.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
result.append(replacement); result.append(replacement);
end_of_last_match = position + search_string.length(); end_of_last_match = position + search_length;
} }
if (end_of_last_match < string.length()) if (end_of_last_match < string_length) {
result.append(string.substring_view(end_of_last_match)); utf16_string_view = utf16_string_view.substring_view(end_of_last_match);
result.append(utf16_string_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
}
return js_string(vm, result.build()); return js_string(vm, result.build());
} }

View file

@ -143,3 +143,15 @@ test("search value is coerced to a string", () => {
expect(newString).toBe("abc"); expect(newString).toBe("abc");
expect(coerced).toBe("x"); expect(coerced).toBe("x");
}); });
test("UTF-16", () => {
expect("😀".replaceAll("😀", "")).toBe("");
expect("😀".replaceAll("\ud83d", "")).toBe("\ude00");
expect("😀".replaceAll("\ude00", "")).toBe("\ud83d");
expect("😀😀😀".replaceAll("\ud83d", "")).toBe("\ude00\ude00\ude00");
expect("😀😀😀".replaceAll("\ude00", "")).toBe("\ud83d\ud83d\ud83d");
// FIXME: RegExp.prototype [ @@replace ] also needs to support UTF-16.
// expect("😀".replaceAll(/\ud83d/g, "")).toBe("\ude00");
// expect("😀".replaceAll(/\ude00/g, "")).toBe("\ud83d");
});