1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 15:47:44 +00:00

LibJS: Implement RegExp.prototype [ @@match ] with UTF-16 code units

This commit is contained in:
Timothy Flynn 2021-07-22 10:19:27 -04:00 committed by Linus Groh
parent b1ea9c20b0
commit 2c023157e9
6 changed files with 103 additions and 16 deletions

View file

@ -112,7 +112,7 @@ size_t advance_string_index(String const& string, size_t index, bool unicode)
return advance_string_index(utf16_string_view, index, unicode);
}
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode)
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, Utf16View const& string, bool unicode)
{
auto& vm = global_object.vm();
@ -127,6 +127,14 @@ static void increment_last_index(GlobalObject& global_object, Object& regexp_obj
regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes);
}
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode)
{
auto utf16_string = AK::utf8_to_utf16(string);
Utf16View utf16_string_view { utf16_string };
return increment_last_index(global_object, regexp_object, utf16_string_view, unicode);
}
// 1.1.2.1 Match Records, https://tc39.es/proposal-regexp-match-indices/#sec-match-records
struct Match {
static Match create(regex::Match const& match)
@ -485,9 +493,11 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
auto* regexp_object = this_object_from(vm, global_object);
if (!regexp_object)
return {};
auto s = vm.argument(0).to_string(global_object);
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto global_value = regexp_object->get(vm.names.global);
if (vm.exception())
@ -495,7 +505,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
bool global = global_value.to_boolean();
if (!global) {
auto result = regexp_exec(global_object, *regexp_object, s);
auto result = regexp_exec(global_object, *regexp_object, string_view);
if (vm.exception())
return {};
return result;
@ -517,7 +527,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
size_t n = 0;
while (true) {
auto result = regexp_exec(global_object, *regexp_object, s);
auto result = regexp_exec(global_object, *regexp_object, string_view);
if (vm.exception())
return {};
@ -542,7 +552,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
return {};
if (match_str.is_empty()) {
increment_last_index(global_object, *regexp_object, s, unicode);
increment_last_index(global_object, *regexp_object, string_view, unicode);
if (vm.exception())
return {};
}
@ -558,7 +568,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match_all)
if (!regexp_object)
return {};
auto string = vm.argument(0).to_string(global_object);
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};

View file

@ -10,12 +10,12 @@
namespace JS {
// 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, String string, bool global, bool unicode)
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
{
return global_object.heap().allocate<RegExpStringIterator>(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode);
}
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode)
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
: Object(prototype)
, m_regexp_object(regexp_object)
, m_string(move(string))

View file

@ -6,6 +6,7 @@
#pragma once
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Object.h>
namespace JS {
@ -14,13 +15,13 @@ class RegExpStringIterator final : public Object {
JS_OBJECT(RegExpStringIterator, Object);
public:
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, String string, bool global, bool unicode);
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode);
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
virtual ~RegExpStringIterator() override = default;
Object& regexp_object() { return m_regexp_object; }
String const& string() const { return m_string; }
Utf16View string() const { return Utf16View { m_string }; }
bool global() const { return m_global; }
bool unicode() const { return m_unicode; }
@ -31,7 +32,7 @@ private:
virtual void visit_edges(Cell::Visitor&) override;
Object& m_regexp_object;
String m_string;
Vector<u16> m_string;
bool m_global { false };
bool m_unicode { false };
bool m_done { false };

View file

@ -839,13 +839,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match)
if (vm.exception())
return {};
}
auto s = this_object.to_string(global_object);
auto string = this_object.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto rx = regexp_create(global_object, regexp, js_undefined());
if (!rx)
return {};
return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, s));
return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, utf16_string_view));
}
// 22.1.3.12 String.prototype.matchAll ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.matchall
@ -879,13 +882,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match_all)
if (vm.exception())
return {};
}
auto s = this_object.to_string(global_object);
auto string = this_object.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto rx = regexp_create(global_object, regexp, js_string(vm, "g"));
if (!rx)
return {};
return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, s));
return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, utf16_string_view));
}
// 22.1.3.17 String.prototype.replace ( searchValue, replaceValue ), https://tc39.es/ecma262/#sec-string.prototype.replace

View file

@ -45,3 +45,13 @@ test("override exec with non-function", () => {
re.exec = 3;
expect("test".match(re)).not.toBeNull();
});
test("UTF-16", () => {
expect("😀".match("foo")).toBeNull();
expect("😀".match("\ud83d")).toEqual(["\ud83d"]);
expect("😀".match("\ude00")).toEqual(["\ude00"]);
expect("😀😀".match("\ud83d")).toEqual(["\ud83d"]);
expect("😀😀".match("\ude00")).toEqual(["\ude00"]);
expect("😀😀".match(/\ud83d/g)).toEqual(["\ud83d", "\ud83d"]);
expect("😀😀".match(/\ude00/g)).toEqual(["\ude00", "\ude00"]);
});

View file

@ -76,3 +76,63 @@ test("basic functionality", () => {
expect(next.value).toBeUndefined();
}
});
test("UTF-16", () => {
{
var iterator = "😀".matchAll("foo");
var next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀".matchAll("\ud83d");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(0);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀😀".matchAll("\ud83d");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(0);
next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ud83d"]);
expect(next.value.index).toBe(2);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
{
var iterator = "😀😀".matchAll("\ude00");
var next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ude00"]);
expect(next.value.index).toBe(1);
next = iterator.next();
expect(next.done).toBeFalse();
expect(next.value).toEqual(["\ude00"]);
expect(next.value.index).toBe(3);
next = iterator.next();
expect(next.done).toBeTrue();
expect(next.value).toBeUndefined();
}
});