mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 12:07:45 +00:00
LibJS: Implement String.prototype.split with UTF-16 code units
Also required implementing the SplitMatch abstract operation with UTF-16 code units.
This commit is contained in:
parent
733a92820b
commit
d3c25593b9
2 changed files with 38 additions and 17 deletions
|
@ -40,14 +40,17 @@ static Vector<u16> utf16_string_from(VM& vm, GlobalObject& global_object)
|
||||||
return this_value.to_utf16_string(global_object);
|
return this_value.to_utf16_string(global_object);
|
||||||
}
|
}
|
||||||
|
|
||||||
static Optional<size_t> split_match(const String& haystack, size_t start, const String& needle)
|
// 22.1.3.21.1 SplitMatch ( S, q, R ), https://tc39.es/ecma262/#sec-splitmatch
|
||||||
|
static Optional<size_t> split_match(Utf16View const& haystack, size_t start, Utf16View const& needle)
|
||||||
{
|
{
|
||||||
auto r = needle.length();
|
auto r = needle.length_in_code_units();
|
||||||
auto s = haystack.length();
|
auto s = haystack.length_in_code_units();
|
||||||
if (start + r > s)
|
if (start + r > s)
|
||||||
return {};
|
return {};
|
||||||
if (!haystack.substring_view(start).starts_with(needle))
|
for (size_t i = 0; i < r; ++i) {
|
||||||
|
if (haystack.code_unit_at(start + i) != needle.code_unit_at(i))
|
||||||
return {};
|
return {};
|
||||||
|
}
|
||||||
return start + r;
|
return start + r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -676,7 +679,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::split)
|
||||||
return vm.call(*splitter, separator_argument, object, limit_argument);
|
return vm.call(*splitter, separator_argument, object, limit_argument);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto string = object.to_string(global_object);
|
auto string = object.to_utf16_string(global_object);
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
|
@ -690,34 +693,40 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::split)
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto separator = separator_argument.to_string(global_object);
|
auto separator = separator_argument.to_utf16_string(global_object);
|
||||||
if (vm.exception())
|
if (vm.exception())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
return array;
|
return array;
|
||||||
|
|
||||||
|
Utf16View utf16_string_view { string };
|
||||||
|
auto string_length = utf16_string_view.length_in_code_units();
|
||||||
|
|
||||||
|
Utf16View utf16_separator_view { separator };
|
||||||
|
auto separator_length = utf16_separator_view.length_in_code_units();
|
||||||
|
|
||||||
if (separator_argument.is_undefined()) {
|
if (separator_argument.is_undefined()) {
|
||||||
array->create_data_property_or_throw(0, js_string(vm, string));
|
array->create_data_property_or_throw(0, js_string(vm, utf16_string_view));
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (string.length() == 0) {
|
if (string_length == 0) {
|
||||||
if (!separator.is_empty())
|
if (separator_length > 0)
|
||||||
array->create_data_property_or_throw(0, js_string(vm, string));
|
array->create_data_property_or_throw(0, js_string(vm, utf16_string_view));
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t start = 0;
|
size_t start = 0; // 'p' in the spec.
|
||||||
auto position = start;
|
auto position = start; // 'q' in the spec.
|
||||||
while (position != string.length()) {
|
while (position != string_length) {
|
||||||
auto match = split_match(string, position, separator);
|
auto match = split_match(utf16_string_view, position, utf16_separator_view); // 'e' in the spec.
|
||||||
if (!match.has_value() || match.value() == start) {
|
if (!match.has_value() || match.value() == start) {
|
||||||
++position;
|
++position;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto segment = string.substring_view(start, position - start);
|
auto segment = utf16_string_view.substring_view(start, position - start);
|
||||||
array->create_data_property_or_throw(array_length, js_string(vm, segment));
|
array->create_data_property_or_throw(array_length, js_string(vm, segment));
|
||||||
++array_length;
|
++array_length;
|
||||||
if (array_length == limit)
|
if (array_length == limit)
|
||||||
|
@ -726,7 +735,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::split)
|
||||||
position = start;
|
position = start;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto rest = string.substring(start);
|
auto rest = utf16_string_view.substring_view(start);
|
||||||
array->create_data_property_or_throw(array_length, js_string(vm, rest));
|
array->create_data_property_or_throw(array_length, js_string(vm, rest));
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
|
|
|
@ -65,3 +65,15 @@ test("regex split", () => {
|
||||||
"",
|
"",
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("UTF-16", () => {
|
||||||
|
var s = "😀";
|
||||||
|
expect(s.split()).toEqual(["😀"]);
|
||||||
|
expect(s.split("😀")).toEqual(["", ""]);
|
||||||
|
expect(s.split("\ud83d")).toEqual(["", "\ude00"]);
|
||||||
|
expect(s.split("\ude00")).toEqual(["\ud83d", ""]);
|
||||||
|
|
||||||
|
// FIXME: RegExp.prototype [ @@split ] also needs to support UTF-16.
|
||||||
|
// expect(s.split(/\ud83d/)).toEqual(["", "\ude00"]);
|
||||||
|
// expect(s.split(/\ude00/)).toEqual(["\ud83d", ""]);
|
||||||
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue