mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 18:17:44 +00:00
LibJS: Implement RegExp.prototype [ @@match ] with UTF-16 code units
This commit is contained in:
parent
b1ea9c20b0
commit
2c023157e9
6 changed files with 103 additions and 16 deletions
|
@ -112,7 +112,7 @@ size_t advance_string_index(String const& string, size_t index, bool unicode)
|
|||
return advance_string_index(utf16_string_view, index, unicode);
|
||||
}
|
||||
|
||||
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode)
|
||||
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, Utf16View const& string, bool unicode)
|
||||
{
|
||||
auto& vm = global_object.vm();
|
||||
|
||||
|
@ -127,6 +127,14 @@ static void increment_last_index(GlobalObject& global_object, Object& regexp_obj
|
|||
regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes);
|
||||
}
|
||||
|
||||
static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode)
|
||||
{
|
||||
auto utf16_string = AK::utf8_to_utf16(string);
|
||||
Utf16View utf16_string_view { utf16_string };
|
||||
|
||||
return increment_last_index(global_object, regexp_object, utf16_string_view, unicode);
|
||||
}
|
||||
|
||||
// 1.1.2.1 Match Records, https://tc39.es/proposal-regexp-match-indices/#sec-match-records
|
||||
struct Match {
|
||||
static Match create(regex::Match const& match)
|
||||
|
@ -485,9 +493,11 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
|
|||
auto* regexp_object = this_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
auto s = vm.argument(0).to_string(global_object);
|
||||
|
||||
auto string = vm.argument(0).to_utf16_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
Utf16View string_view { string };
|
||||
|
||||
auto global_value = regexp_object->get(vm.names.global);
|
||||
if (vm.exception())
|
||||
|
@ -495,7 +505,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
|
|||
bool global = global_value.to_boolean();
|
||||
|
||||
if (!global) {
|
||||
auto result = regexp_exec(global_object, *regexp_object, s);
|
||||
auto result = regexp_exec(global_object, *regexp_object, string_view);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
return result;
|
||||
|
@ -517,7 +527,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
|
|||
size_t n = 0;
|
||||
|
||||
while (true) {
|
||||
auto result = regexp_exec(global_object, *regexp_object, s);
|
||||
auto result = regexp_exec(global_object, *regexp_object, string_view);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
||||
|
@ -542,7 +552,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
|
|||
return {};
|
||||
|
||||
if (match_str.is_empty()) {
|
||||
increment_last_index(global_object, *regexp_object, s, unicode);
|
||||
increment_last_index(global_object, *regexp_object, string_view, unicode);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
}
|
||||
|
@ -558,7 +568,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match_all)
|
|||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
auto string = vm.argument(0).to_string(global_object);
|
||||
auto string = vm.argument(0).to_utf16_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
||||
|
|
|
@ -10,12 +10,12 @@
|
|||
namespace JS {
|
||||
|
||||
// 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator
|
||||
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, String string, bool global, bool unicode)
|
||||
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
|
||||
{
|
||||
return global_object.heap().allocate<RegExpStringIterator>(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode);
|
||||
}
|
||||
|
||||
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode)
|
||||
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
|
||||
: Object(prototype)
|
||||
, m_regexp_object(regexp_object)
|
||||
, m_string(move(string))
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Utf16View.h>
|
||||
#include <LibJS/Runtime/Object.h>
|
||||
|
||||
namespace JS {
|
||||
|
@ -14,13 +15,13 @@ class RegExpStringIterator final : public Object {
|
|||
JS_OBJECT(RegExpStringIterator, Object);
|
||||
|
||||
public:
|
||||
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, String string, bool global, bool unicode);
|
||||
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
|
||||
|
||||
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode);
|
||||
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
|
||||
virtual ~RegExpStringIterator() override = default;
|
||||
|
||||
Object& regexp_object() { return m_regexp_object; }
|
||||
String const& string() const { return m_string; }
|
||||
Utf16View string() const { return Utf16View { m_string }; }
|
||||
bool global() const { return m_global; }
|
||||
bool unicode() const { return m_unicode; }
|
||||
|
||||
|
@ -31,7 +32,7 @@ private:
|
|||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
Object& m_regexp_object;
|
||||
String m_string;
|
||||
Vector<u16> m_string;
|
||||
bool m_global { false };
|
||||
bool m_unicode { false };
|
||||
bool m_done { false };
|
||||
|
|
|
@ -839,13 +839,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match)
|
|||
if (vm.exception())
|
||||
return {};
|
||||
}
|
||||
auto s = this_object.to_string(global_object);
|
||||
|
||||
auto string = this_object.to_utf16_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
Utf16View utf16_string_view { string };
|
||||
|
||||
auto rx = regexp_create(global_object, regexp, js_undefined());
|
||||
if (!rx)
|
||||
return {};
|
||||
return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, s));
|
||||
return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, utf16_string_view));
|
||||
}
|
||||
|
||||
// 22.1.3.12 String.prototype.matchAll ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.matchall
|
||||
|
@ -879,13 +882,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match_all)
|
|||
if (vm.exception())
|
||||
return {};
|
||||
}
|
||||
auto s = this_object.to_string(global_object);
|
||||
|
||||
auto string = this_object.to_utf16_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
Utf16View utf16_string_view { string };
|
||||
|
||||
auto rx = regexp_create(global_object, regexp, js_string(vm, "g"));
|
||||
if (!rx)
|
||||
return {};
|
||||
return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, s));
|
||||
return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, utf16_string_view));
|
||||
}
|
||||
|
||||
// 22.1.3.17 String.prototype.replace ( searchValue, replaceValue ), https://tc39.es/ecma262/#sec-string.prototype.replace
|
||||
|
|
|
@ -45,3 +45,13 @@ test("override exec with non-function", () => {
|
|||
re.exec = 3;
|
||||
expect("test".match(re)).not.toBeNull();
|
||||
});
|
||||
|
||||
test("UTF-16", () => {
|
||||
expect("😀".match("foo")).toBeNull();
|
||||
expect("😀".match("\ud83d")).toEqual(["\ud83d"]);
|
||||
expect("😀".match("\ude00")).toEqual(["\ude00"]);
|
||||
expect("😀😀".match("\ud83d")).toEqual(["\ud83d"]);
|
||||
expect("😀😀".match("\ude00")).toEqual(["\ude00"]);
|
||||
expect("😀😀".match(/\ud83d/g)).toEqual(["\ud83d", "\ud83d"]);
|
||||
expect("😀😀".match(/\ude00/g)).toEqual(["\ude00", "\ude00"]);
|
||||
});
|
||||
|
|
|
@ -76,3 +76,63 @@ test("basic functionality", () => {
|
|||
expect(next.value).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
||||
test("UTF-16", () => {
|
||||
{
|
||||
var iterator = "😀".matchAll("foo");
|
||||
|
||||
var next = iterator.next();
|
||||
expect(next.done).toBeTrue();
|
||||
expect(next.value).toBeUndefined();
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeTrue();
|
||||
expect(next.value).toBeUndefined();
|
||||
}
|
||||
{
|
||||
var iterator = "😀".matchAll("\ud83d");
|
||||
|
||||
var next = iterator.next();
|
||||
expect(next.done).toBeFalse();
|
||||
expect(next.value).toEqual(["\ud83d"]);
|
||||
expect(next.value.index).toBe(0);
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeTrue();
|
||||
expect(next.value).toBeUndefined();
|
||||
}
|
||||
{
|
||||
var iterator = "😀😀".matchAll("\ud83d");
|
||||
|
||||
var next = iterator.next();
|
||||
expect(next.done).toBeFalse();
|
||||
expect(next.value).toEqual(["\ud83d"]);
|
||||
expect(next.value.index).toBe(0);
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeFalse();
|
||||
expect(next.value).toEqual(["\ud83d"]);
|
||||
expect(next.value.index).toBe(2);
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeTrue();
|
||||
expect(next.value).toBeUndefined();
|
||||
}
|
||||
{
|
||||
var iterator = "😀😀".matchAll("\ude00");
|
||||
|
||||
var next = iterator.next();
|
||||
expect(next.done).toBeFalse();
|
||||
expect(next.value).toEqual(["\ude00"]);
|
||||
expect(next.value.index).toBe(1);
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeFalse();
|
||||
expect(next.value).toEqual(["\ude00"]);
|
||||
expect(next.value.index).toBe(3);
|
||||
|
||||
next = iterator.next();
|
||||
expect(next.done).toBeTrue();
|
||||
expect(next.value).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue