1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-06-01 04:18:14 +00:00

LibJS: Implement String.prototype.codePointAt with UTF-16 code units

This also implements the CodePointAt abstract operation. This is needed
to handle invalid code units specific to the JavaScript spec, rather
than e.g. inserting replacement code units. This abstraction is public
because RegExp.prototype will also need it.
This commit is contained in:
Timothy Flynn 2021-07-19 11:34:29 -04:00 committed by Andreas Kling
parent 48a28a9a73
commit a05ce330b8
3 changed files with 68 additions and 8 deletions

View file

@ -51,6 +51,29 @@ static Optional<size_t> split_match(const String& haystack, size_t start, const
return start + r;
}
// 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat
CodePoint code_point_at(Utf16View const& string, size_t position)
{
VERIFY(position < string.length_in_code_units());
auto first = string.code_unit_at(position);
auto code_point = static_cast<u32>(first);
if (!Utf16View::is_high_surrogate(first) && !Utf16View::is_low_surrogate(first))
return { code_point, 1, false };
if (Utf16View::is_low_surrogate(first) || (position + 1 == string.length_in_code_units()))
return { code_point, 1, true };
auto second = string.code_unit_at(position + 1);
if (!Utf16View::is_low_surrogate(second))
return { code_point, 1, true };
code_point = Utf16View::decode_surrogate_pair(first, second);
return { code_point, 2, false };
}
StringPrototype::StringPrototype(GlobalObject& global_object)
: StringObject(*js_string(global_object.heap(), String::empty()), *global_object.object_prototype())
{
@ -162,19 +185,19 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::char_code_at)
// 22.1.3.3 String.prototype.codePointAt ( pos ), https://tc39.es/ecma262/#sec-string.prototype.codepointat
JS_DEFINE_NATIVE_FUNCTION(StringPrototype::code_point_at)
{
auto string = ak_string_from(vm, global_object);
if (!string.has_value())
auto string = utf16_string_from(vm, global_object);
if (vm.exception())
return {};
auto position = vm.argument(0).to_integer_or_infinity(global_object);
if (vm.exception())
return {};
auto view = Utf8View(*string);
if (position < 0 || position >= view.length())
Utf16View utf16_string_view { string };
if (position < 0 || position >= utf16_string_view.length_in_code_units())
return js_undefined();
auto it = view.begin();
for (auto i = 0; i < position; ++i)
++it;
return Value(*it);
auto code_point = JS::code_point_at(utf16_string_view, position);
return Value(code_point.code_point);
}
// 22.1.3.16 String.prototype.repeat ( count ), https://tc39.es/ecma262/#sec-string.prototype.repeat

View file

@ -10,6 +10,14 @@
namespace JS {
struct CodePoint {
u32 code_point { 0 };
size_t code_unit_count { 0 };
bool is_unpaired_surrogate { false };
};
CodePoint code_point_at(Utf16View const& string, size_t position);
class StringPrototype final : public StringObject {
JS_OBJECT(StringPrototype, StringObject);

View file

@ -0,0 +1,29 @@
test("basic functionality", () => {
expect(String.prototype.charAt).toHaveLength(1);
var s = "Foobar";
expect(typeof s).toBe("string");
expect(s).toHaveLength(6);
expect(s.codePointAt(0)).toBe(70);
expect(s.codePointAt(1)).toBe(111);
expect(s.codePointAt(2)).toBe(111);
expect(s.codePointAt(3)).toBe(98);
expect(s.codePointAt(4)).toBe(97);
expect(s.codePointAt(5)).toBe(114);
expect(s.codePointAt(6)).toBe(undefined);
expect(s.codePointAt(-1)).toBe(undefined);
expect(s.codePointAt()).toBe(70);
expect(s.codePointAt(NaN)).toBe(70);
expect(s.codePointAt("foo")).toBe(70);
expect(s.codePointAt(undefined)).toBe(70);
});
test("UTF-16", () => {
var s = "😀";
expect(s).toHaveLength(2);
expect(s.codePointAt(0)).toBe(0x1f600);
expect(s.codePointAt(1)).toBe(0xde00);
expect(s.codePointAt(2)).toBe(undefined);
});