mirror of
https://github.com/RGBCube/serenity
synced 2025-06-01 04:18:14 +00:00
LibJS: Implement String.prototype.codePointAt with UTF-16 code units
This also implements the CodePointAt abstract operation. This is needed to handle invalid code units specific to the JavaScript spec, rather than e.g. inserting replacement code units. This abstraction is public because RegExp.prototype will also need it.
This commit is contained in:
parent
48a28a9a73
commit
a05ce330b8
3 changed files with 68 additions and 8 deletions
|
@ -51,6 +51,29 @@ static Optional<size_t> split_match(const String& haystack, size_t start, const
|
|||
return start + r;
|
||||
}
|
||||
|
||||
// 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat
|
||||
CodePoint code_point_at(Utf16View const& string, size_t position)
|
||||
{
|
||||
VERIFY(position < string.length_in_code_units());
|
||||
|
||||
auto first = string.code_unit_at(position);
|
||||
auto code_point = static_cast<u32>(first);
|
||||
|
||||
if (!Utf16View::is_high_surrogate(first) && !Utf16View::is_low_surrogate(first))
|
||||
return { code_point, 1, false };
|
||||
|
||||
if (Utf16View::is_low_surrogate(first) || (position + 1 == string.length_in_code_units()))
|
||||
return { code_point, 1, true };
|
||||
|
||||
auto second = string.code_unit_at(position + 1);
|
||||
|
||||
if (!Utf16View::is_low_surrogate(second))
|
||||
return { code_point, 1, true };
|
||||
|
||||
code_point = Utf16View::decode_surrogate_pair(first, second);
|
||||
return { code_point, 2, false };
|
||||
}
|
||||
|
||||
StringPrototype::StringPrototype(GlobalObject& global_object)
|
||||
: StringObject(*js_string(global_object.heap(), String::empty()), *global_object.object_prototype())
|
||||
{
|
||||
|
@ -162,19 +185,19 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::char_code_at)
|
|||
// 22.1.3.3 String.prototype.codePointAt ( pos ), https://tc39.es/ecma262/#sec-string.prototype.codepointat
|
||||
JS_DEFINE_NATIVE_FUNCTION(StringPrototype::code_point_at)
|
||||
{
|
||||
auto string = ak_string_from(vm, global_object);
|
||||
if (!string.has_value())
|
||||
auto string = utf16_string_from(vm, global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
auto position = vm.argument(0).to_integer_or_infinity(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
auto view = Utf8View(*string);
|
||||
if (position < 0 || position >= view.length())
|
||||
|
||||
Utf16View utf16_string_view { string };
|
||||
if (position < 0 || position >= utf16_string_view.length_in_code_units())
|
||||
return js_undefined();
|
||||
auto it = view.begin();
|
||||
for (auto i = 0; i < position; ++i)
|
||||
++it;
|
||||
return Value(*it);
|
||||
|
||||
auto code_point = JS::code_point_at(utf16_string_view, position);
|
||||
return Value(code_point.code_point);
|
||||
}
|
||||
|
||||
// 22.1.3.16 String.prototype.repeat ( count ), https://tc39.es/ecma262/#sec-string.prototype.repeat
|
||||
|
|
|
@ -10,6 +10,14 @@
|
|||
|
||||
namespace JS {
|
||||
|
||||
struct CodePoint {
|
||||
u32 code_point { 0 };
|
||||
size_t code_unit_count { 0 };
|
||||
bool is_unpaired_surrogate { false };
|
||||
};
|
||||
|
||||
CodePoint code_point_at(Utf16View const& string, size_t position);
|
||||
|
||||
class StringPrototype final : public StringObject {
|
||||
JS_OBJECT(StringPrototype, StringObject);
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
test("basic functionality", () => {
|
||||
expect(String.prototype.charAt).toHaveLength(1);
|
||||
|
||||
var s = "Foobar";
|
||||
expect(typeof s).toBe("string");
|
||||
expect(s).toHaveLength(6);
|
||||
|
||||
expect(s.codePointAt(0)).toBe(70);
|
||||
expect(s.codePointAt(1)).toBe(111);
|
||||
expect(s.codePointAt(2)).toBe(111);
|
||||
expect(s.codePointAt(3)).toBe(98);
|
||||
expect(s.codePointAt(4)).toBe(97);
|
||||
expect(s.codePointAt(5)).toBe(114);
|
||||
expect(s.codePointAt(6)).toBe(undefined);
|
||||
expect(s.codePointAt(-1)).toBe(undefined);
|
||||
|
||||
expect(s.codePointAt()).toBe(70);
|
||||
expect(s.codePointAt(NaN)).toBe(70);
|
||||
expect(s.codePointAt("foo")).toBe(70);
|
||||
expect(s.codePointAt(undefined)).toBe(70);
|
||||
});
|
||||
|
||||
test("UTF-16", () => {
|
||||
var s = "😀";
|
||||
expect(s).toHaveLength(2);
|
||||
expect(s.codePointAt(0)).toBe(0x1f600);
|
||||
expect(s.codePointAt(1)).toBe(0xde00);
|
||||
expect(s.codePointAt(2)).toBe(undefined);
|
||||
});
|
Loading…
Add table
Add a link
Reference in a new issue