mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 11:07:45 +00:00
LibJS: Implement RegExp.prototype [ @@split ] with UTF-16 code units
This commit is contained in:
parent
66c31a0c07
commit
ee7b04f7bb
2 changed files with 19 additions and 14 deletions
|
@ -838,9 +838,11 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
|
||||||
auto* regexp_object = this_object_from(vm, global_object);
|
auto* regexp_object = this_object_from(vm, global_object);
|
||||||
if (!regexp_object)
|
if (!regexp_object)
|
||||||
return {};
|
return {};
|
||||||
auto string = vm.argument(0).to_string(global_object);
|
|
||||||
|
auto string = vm.argument(0).to_utf16_string(global_object);
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
|
Utf16View string_view { string };
|
||||||
|
|
||||||
auto* constructor = species_constructor(global_object, *regexp_object, *global_object.regexp_constructor());
|
auto* constructor = species_constructor(global_object, *regexp_object, *global_object.regexp_constructor());
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
|
@ -879,28 +881,28 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
return array;
|
return array;
|
||||||
|
|
||||||
if (string.is_empty()) {
|
if (string_view.is_empty()) {
|
||||||
auto result = regexp_exec(global_object, *splitter, string);
|
auto result = regexp_exec(global_object, *splitter, string_view);
|
||||||
if (!result.is_null())
|
if (!result.is_null())
|
||||||
return array;
|
return array;
|
||||||
|
|
||||||
array->create_data_property_or_throw(0, js_string(vm, string));
|
array->create_data_property_or_throw(0, js_string(vm, string_view));
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t last_match_end = 0; // 'p' in the spec.
|
size_t last_match_end = 0; // 'p' in the spec.
|
||||||
size_t next_search_from = 0; // 'q' in the spec.
|
size_t next_search_from = 0; // 'q' in the spec.
|
||||||
|
|
||||||
while (next_search_from < string.length()) {
|
while (next_search_from < string_view.length_in_code_units()) {
|
||||||
splitter->set(vm.names.lastIndex, Value(next_search_from), Object::ShouldThrowExceptions::Yes);
|
splitter->set(vm.names.lastIndex, Value(next_search_from), Object::ShouldThrowExceptions::Yes);
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
auto result = regexp_exec(global_object, *splitter, string);
|
auto result = regexp_exec(global_object, *splitter, string_view);
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
if (result.is_null()) {
|
if (result.is_null()) {
|
||||||
next_search_from = advance_string_index(string, next_search_from, unicode);
|
next_search_from = advance_string_index(string_view, next_search_from, unicode);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -910,14 +912,14 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
|
||||||
auto last_index = last_index_value.to_length(global_object); // 'e' in the spec.
|
auto last_index = last_index_value.to_length(global_object); // 'e' in the spec.
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
last_index = min(last_index, string.length());
|
last_index = min(last_index, string_view.length_in_code_units());
|
||||||
|
|
||||||
if (last_index == last_match_end) {
|
if (last_index == last_match_end) {
|
||||||
next_search_from = advance_string_index(string, next_search_from, unicode);
|
next_search_from = advance_string_index(string_view, next_search_from, unicode);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto substring = string.substring(last_match_end, next_search_from - last_match_end);
|
auto substring = string_view.substring_view(last_match_end, next_search_from - last_match_end);
|
||||||
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
|
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
|
||||||
|
|
||||||
if (++array_length == limit)
|
if (++array_length == limit)
|
||||||
|
@ -946,7 +948,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
|
||||||
next_search_from = last_index;
|
next_search_from = last_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto substring = string.substring(last_match_end);
|
auto substring = string_view.substring_view(last_match_end);
|
||||||
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
|
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
|
|
|
@ -73,7 +73,10 @@ test("UTF-16", () => {
|
||||||
expect(s.split("\ud83d")).toEqual(["", "\ude00"]);
|
expect(s.split("\ud83d")).toEqual(["", "\ude00"]);
|
||||||
expect(s.split("\ude00")).toEqual(["\ud83d", ""]);
|
expect(s.split("\ude00")).toEqual(["\ud83d", ""]);
|
||||||
|
|
||||||
// FIXME: RegExp.prototype [ @@split ] also needs to support UTF-16.
|
expect(s.split(/\ud83d/)).toEqual(["", "\ude00"]);
|
||||||
// expect(s.split(/\ud83d/)).toEqual(["", "\ude00"]);
|
expect(s.split(/\ude00/)).toEqual(["\ud83d", ""]);
|
||||||
// expect(s.split(/\ude00/)).toEqual(["\ud83d", ""]);
|
|
||||||
|
s = "😀😀😀";
|
||||||
|
expect(s.split(/\ud83d/)).toEqual(["", "\ude00", "\ude00", "\ude00"]);
|
||||||
|
expect(s.split(/\ude00/)).toEqual(["\ud83d", "\ud83d", "\ud83d", ""]);
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue