1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 15:17:36 +00:00

LibJS: Implement RegExp.prototype [ @@match ] with UTF-16 code units

This commit is contained in:
Timothy Flynn 2021-07-22 10:19:27 -04:00 committed by Linus Groh
parent b1ea9c20b0
commit 2c023157e9
6 changed files with 103 additions and 16 deletions

View file

@ -112,7 +112,7 @@ size_t advance_string_index(String const& string, size_t index, bool unicode)
return advance_string_index(utf16_string_view, index, unicode); return advance_string_index(utf16_string_view, index, unicode);
} }
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode) static void increment_last_index(GlobalObject& global_object, Object& regexp_object, Utf16View const& string, bool unicode)
{ {
auto& vm = global_object.vm(); auto& vm = global_object.vm();
@ -127,6 +127,14 @@ static void increment_last_index(GlobalObject& global_object, Object& regexp_obj
regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes); regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes);
} }
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode)
{
auto utf16_string = AK::utf8_to_utf16(string);
Utf16View utf16_string_view { utf16_string };
return increment_last_index(global_object, regexp_object, utf16_string_view, unicode);
}
// 1.1.2.1 Match Records, https://tc39.es/proposal-regexp-match-indices/#sec-match-records // 1.1.2.1 Match Records, https://tc39.es/proposal-regexp-match-indices/#sec-match-records
struct Match { struct Match {
static Match create(regex::Match const& match) static Match create(regex::Match const& match)
@ -485,9 +493,11 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
auto* regexp_object = this_object_from(vm, global_object); auto* regexp_object = this_object_from(vm, global_object);
if (!regexp_object) if (!regexp_object)
return {}; return {};
auto s = vm.argument(0).to_string(global_object);
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};
Utf16View string_view { string };
auto global_value = regexp_object->get(vm.names.global); auto global_value = regexp_object->get(vm.names.global);
if (vm.exception()) if (vm.exception())
@ -495,7 +505,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
bool global = global_value.to_boolean(); bool global = global_value.to_boolean();
if (!global) { if (!global) {
auto result = regexp_exec(global_object, *regexp_object, s); auto result = regexp_exec(global_object, *regexp_object, string_view);
if (vm.exception()) if (vm.exception())
return {}; return {};
return result; return result;
@ -517,7 +527,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
size_t n = 0; size_t n = 0;
while (true) { while (true) {
auto result = regexp_exec(global_object, *regexp_object, s); auto result = regexp_exec(global_object, *regexp_object, string_view);
if (vm.exception()) if (vm.exception())
return {}; return {};
@ -542,7 +552,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
return {}; return {};
if (match_str.is_empty()) { if (match_str.is_empty()) {
increment_last_index(global_object, *regexp_object, s, unicode); increment_last_index(global_object, *regexp_object, string_view, unicode);
if (vm.exception()) if (vm.exception())
return {}; return {};
} }
@ -558,7 +568,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match_all)
if (!regexp_object) if (!regexp_object)
return {}; return {};
auto string = vm.argument(0).to_string(global_object); auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};

View file

@ -10,12 +10,12 @@
namespace JS { namespace JS {
// 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator // 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, String string, bool global, bool unicode) RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
{ {
return global_object.heap().allocate<RegExpStringIterator>(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode); return global_object.heap().allocate<RegExpStringIterator>(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode);
} }
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode) RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
: Object(prototype) : Object(prototype)
, m_regexp_object(regexp_object) , m_regexp_object(regexp_object)
, m_string(move(string)) , m_string(move(string))

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Object.h> #include <LibJS/Runtime/Object.h>
namespace JS { namespace JS {
@ -14,13 +15,13 @@ class RegExpStringIterator final : public Object {
JS_OBJECT(RegExpStringIterator, Object); JS_OBJECT(RegExpStringIterator, Object);
public: public:
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, String string, bool global, bool unicode); static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode); explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
virtual ~RegExpStringIterator() override = default; virtual ~RegExpStringIterator() override = default;
Object& regexp_object() { return m_regexp_object; } Object& regexp_object() { return m_regexp_object; }
String const& string() const { return m_string; } Utf16View string() const { return Utf16View { m_string }; }
bool global() const { return m_global; } bool global() const { return m_global; }
bool unicode() const { return m_unicode; } bool unicode() const { return m_unicode; }
@ -31,7 +32,7 @@ private:
virtual void visit_edges(Cell::Visitor&) override; virtual void visit_edges(Cell::Visitor&) override;
Object& m_regexp_object; Object& m_regexp_object;
String m_string; Vector<u16> m_string;
bool m_global { false }; bool m_global { false };
bool m_unicode { false }; bool m_unicode { false };
bool m_done { false }; bool m_done { false };

View file

@ -839,13 +839,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match)
if (vm.exception()) if (vm.exception())
return {}; return {};
} }
auto s = this_object.to_string(global_object);
auto string = this_object.to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};
Utf16View utf16_string_view { string };
auto rx = regexp_create(global_object, regexp, js_undefined()); auto rx = regexp_create(global_object, regexp, js_undefined());
if (!rx) if (!rx)
return {}; return {};
return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, s)); return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, utf16_string_view));
} }
// 22.1.3.12 String.prototype.matchAll ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.matchall // 22.1.3.12 String.prototype.matchAll ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.matchall
@ -879,13 +882,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match_all)
if (vm.exception()) if (vm.exception())
return {}; return {};
} }
auto s = this_object.to_string(global_object);
auto string = this_object.to_utf16_string(global_object);
if (vm.exception()) if (vm.exception())
return {}; return {};
Utf16View utf16_string_view { string };
auto rx = regexp_create(global_object, regexp, js_string(vm, "g")); auto rx = regexp_create(global_object, regexp, js_string(vm, "g"));
if (!rx) if (!rx)
return {}; return {};
return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, s)); return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, utf16_string_view));
} }
// 22.1.3.17 String.prototype.replace ( searchValue, replaceValue ), https://tc39.es/ecma262/#sec-string.prototype.replace // 22.1.3.17 String.prototype.replace ( searchValue, replaceValue ), https://tc39.es/ecma262/#sec-string.prototype.replace

View file

@ -45,3 +45,13 @@ test("override exec with non-function", () => {
re.exec = 3; re.exec = 3;
expect("test".match(re)).not.toBeNull(); expect("test".match(re)).not.toBeNull();
}); });
test("UTF-16", () => {
expect("😀".match("foo")).toBeNull();
expect("😀".match("\ud83d")).toEqual(["\ud83d"]);
expect("😀".match("\ude00")).toEqual(["\ude00"]);
expect("😀😀".match("\ud83d")).toEqual(["\ud83d"]);
expect("😀😀".match("\ude00")).toEqual(["\ude00"]);
expect("😀😀".match(/\ud83d/g)).toEqual(["\ud83d", "\ud83d"]);
expect("😀😀".match(/\ude00/g)).toEqual(["\ude00", "\ude00"]);
});

View file

@ -76,3 +76,63 @@ test("basic functionality", () => {
expect(next.value).toBeUndefined(); expect(next.value).toBeUndefined();
} }
}); });
test("UTF-16", () => {
{
var iterator = "😀".matchAll("foo");
var next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀".matchAll("\ud83d");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(0);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀😀".matchAll("\ud83d");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(0);
next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(2);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀😀".matchAll("\ude00");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ude00"]);
expect(next.value.index).toBe(1);
next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ude00"]);
expect(next.value.index).toBe(3);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
});