1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 15:27:35 +00:00

LibJS: Hook up Regex<ECMA262> to RegExpObject and implement `test()'

This makes RegExpObject compile and store a Regex<ECMA262>, adds
all flag-related properties, and implements `RegExpPrototype.test()`
(complete with 'lastIndex' support) :^)
It should be noted that this only implements `test()' using the builtin
`exec()'.
This commit is contained in:
AnotherTest 2020-11-19 01:50:00 +03:30 committed by Andreas Kling
parent 75081b2bdd
commit 8ba273a2f3
13 changed files with 396 additions and 12 deletions

View file

@ -79,4 +79,4 @@ set(SOURCES
)
serenity_lib(LibJS js)
target_link_libraries(LibJS LibM LibCore LibCrypto)
target_link_libraries(LibJS LibM LibCore LibCrypto LibRegex)

View file

@ -86,6 +86,7 @@ namespace JS {
P(deleteProperty) \
P(description) \
P(done) \
P(dotAll) \
P(entries) \
P(enumerable) \
P(error) \
@ -96,6 +97,7 @@ namespace JS {
P(filter) \
P(find) \
P(findIndex) \
P(flags) \
P(floor) \
P(forEach) \
P(from) \
@ -122,9 +124,11 @@ namespace JS {
P(getUTCMinutes) \
P(getUTCMonth) \
P(getUTCSeconds) \
P(global) \
P(globalThis) \
P(has) \
P(hasOwnProperty) \
P(ignoreCase) \
P(includes) \
P(indexOf) \
P(info) \
@ -138,6 +142,7 @@ namespace JS {
P(join) \
P(keyFor) \
P(keys) \
P(lastIndex) \
P(lastIndexOf) \
P(length) \
P(log) \
@ -146,6 +151,7 @@ namespace JS {
P(max) \
P(message) \
P(min) \
P(multiline) \
P(name) \
P(next) \
P(now) \
@ -174,12 +180,15 @@ namespace JS {
P(sin) \
P(slice) \
P(some) \
P(source) \
P(splice) \
P(sqrt) \
P(startsWith) \
P(stringify) \
P(sticky) \
P(substring) \
P(tan) \
P(test) \
P(toDateString) \
P(toISOString) \
P(toJSON) \
@ -196,6 +205,7 @@ namespace JS {
P(trimStart) \
P(trunc) \
P(undefined) \
P(unicode) \
P(unshift) \
P(value) \
P(valueOf) \

View file

@ -146,6 +146,9 @@
M(ReflectBadArgumentsList, "Arguments list must be an object") \
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
M(RegExpCompileError, "RegExp compile error: '{}'") \
M(RegExpObjectBadFlag, "Invalid RegExp flag '{}'") \
M(RegExpObjectRepeatedFlag, "Repeated RegExp flag '{}'") \
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \

View file

@ -24,6 +24,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/Function.h>
#include <AK/StringBuilder.h>
#include <LibJS/Heap/Heap.h>
#include <LibJS/Runtime/GlobalObject.h>
@ -33,6 +34,73 @@
namespace JS {
static Flags options_from(const String& flags, VM& vm, GlobalObject& global_object)
{
bool g = false, i = false, m = false, s = false, u = false, y = false;
Flags options {
{ (regex::ECMAScriptFlags)regex::AllFlags::Global }, // JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
{},
};
for (auto ch : flags) {
switch (ch) {
case 'g':
if (g)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
g = true;
options.effective_flags |= regex::ECMAScriptFlags::Global;
options.declared_flags |= regex::ECMAScriptFlags::Global;
break;
case 'i':
if (i)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
i = true;
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
break;
case 'm':
if (m)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
m = true;
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
break;
case 's':
if (s)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
s = true;
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
break;
case 'u':
if (u)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
u = true;
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
break;
case 'y':
if (y)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
y = true;
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
// "What's the difference between sticky and global, then", that's simple.
// all the other flags imply 'global', and the "global" flag implies 'stateful';
// however, the "sticky" flag does *not* imply 'global', only 'stateful'.
options.effective_flags |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
options.effective_flags |= regex::ECMAScriptFlags::Sticky;
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
break;
default:
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
return options;
}
}
return options;
}
RegExpObject* RegExpObject::create(GlobalObject& global_object, String pattern, String flags)
{
return global_object.heap().allocate<RegExpObject>(global_object, pattern, flags, *global_object.regexp_prototype());
@ -42,11 +110,61 @@ RegExpObject::RegExpObject(String pattern, String flags, Object& prototype)
: Object(prototype)
, m_pattern(pattern)
, m_flags(flags)
, m_active_flags(options_from(m_flags, this->vm(), this->global_object()))
, m_regex(pattern, m_active_flags.effective_flags)
{
if (m_regex.parser_result.error != regex::Error::NoError) {
vm().throw_exception<SyntaxError>(global_object(), ErrorType::RegExpCompileError, m_regex.error_string());
}
}
void RegExpObject::initialize(GlobalObject& global_object)
{
auto& vm = this->vm();
Object::initialize(global_object);
define_native_property(vm.names.lastIndex, last_index, set_last_index, Attribute::Writable);
}
RegExpObject::~RegExpObject()
{
}
static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
{
auto* this_object = vm.this_value(global_object).to_object(global_object);
if (!this_object)
return nullptr;
if (!this_object->is_regexp_object()) {
vm.throw_exception<TypeError>(global_object, ErrorType::NotA, "RegExp");
return nullptr;
}
return static_cast<RegExpObject*>(this_object);
}
JS_DEFINE_NATIVE_GETTER(RegExpObject::last_index)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value((unsigned)regexp_object->regex().start_offset);
}
JS_DEFINE_NATIVE_SETTER(RegExpObject::set_last_index)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return;
auto index = value.to_i32(global_object);
if (vm.exception())
return;
if (index < 0)
index = 0;
regexp_object->regex().start_offset = index;
}
}

View file

@ -28,6 +28,12 @@
#include <LibJS/AST.h>
#include <LibJS/Runtime/Object.h>
#include <LibRegex/Regex.h>
struct Flags {
regex::RegexOptions<ECMAScriptFlags> effective_flags;
regex::RegexOptions<ECMAScriptFlags> declared_flags;
};
namespace JS {
@ -38,16 +44,25 @@ public:
static RegExpObject* create(GlobalObject&, String pattern, String flags);
RegExpObject(String pattern, String flags, Object& prototype);
virtual void initialize(GlobalObject&) override;
virtual ~RegExpObject() override;
const String& pattern() const { return m_pattern; }
const String& flags() const { return m_flags; }
const regex::RegexOptions<ECMAScriptFlags>& declared_options() { return m_active_flags.declared_flags; }
const Regex<ECMA262>& regex() { return m_regex; }
const Regex<ECMA262>& regex() const { return m_regex; }
private:
virtual bool is_regexp_object() const override { return true; }
JS_DECLARE_NATIVE_GETTER(last_index);
JS_DECLARE_NATIVE_SETTER(set_last_index);
String m_pattern;
String m_flags;
Flags m_active_flags;
Regex<ECMA262> m_regex;
};
}

View file

@ -43,6 +43,17 @@ void RegExpPrototype::initialize(GlobalObject& global_object)
Object::initialize(global_object);
u8 attr = Attribute::Writable | Attribute::Configurable;
define_native_function(vm.names.toString, to_string, 0, attr);
define_native_function(vm.names.test, test, 1, attr);
u8 readable_attr = Attribute::Configurable;
define_native_property(vm.names.dotAll, dot_all, nullptr, readable_attr);
define_native_property(vm.names.flags, flags, nullptr, readable_attr);
define_native_property(vm.names.global, global, nullptr, readable_attr);
define_native_property(vm.names.ignoreCase, ignore_case, nullptr, readable_attr);
define_native_property(vm.names.multiline, multiline, nullptr, readable_attr);
define_native_property(vm.names.source, source, nullptr, readable_attr);
define_native_property(vm.names.sticky, sticky, nullptr, readable_attr);
define_native_property(vm.names.unicode, unicode, nullptr, readable_attr);
}
RegExpPrototype::~RegExpPrototype()
@ -61,6 +72,124 @@ static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
return static_cast<RegExpObject*>(this_object);
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::dot_all)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::SingleLine));
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::flags)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
auto flags = regexp_object->declared_options();
StringBuilder builder(8);
if (flags.has_flag_set(ECMAScriptFlags::Global))
builder.append('g');
if (flags.has_flag_set(ECMAScriptFlags::Insensitive))
builder.append('i');
if (flags.has_flag_set(ECMAScriptFlags::Multiline))
builder.append('m');
if (flags.has_flag_set(ECMAScriptFlags::SingleLine))
builder.append('s');
if (flags.has_flag_set(ECMAScriptFlags::Unicode))
builder.append('u');
if (flags.has_flag_set(ECMAScriptFlags::Sticky))
builder.append('y');
return js_string(vm, builder.to_string());
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::global)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Global)); // Note that this "Global" is actually "Global | Stateful"
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::ignore_case)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Insensitive));
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::multiline)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Multiline));
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::source)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return js_string(vm, regexp_object->pattern());
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::sticky)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Sticky));
}
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::unicode)
{
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Unicode));
}
RegexResult RegExpPrototype::do_match(const Regex<ECMA262>& re, const StringView& subject)
{
auto result = re.match(subject);
// The 'lastIndex' property is reset on failing tests (if 'global')
if (!result.success && re.options().has_flag_set(ECMAScriptFlags::Global))
re.start_offset = 0;
return result;
}
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::test)
{
// FIXME: This should try using dynamic properties for 'exec' first,
// before falling back to builtin_exec.
auto regexp_object = regexp_object_from(vm, global_object);
if (!regexp_object)
return {};
auto str = vm.argument(0).to_string(global_object);
if (vm.exception())
return {};
// RegExps without "global" and "sticky" always start at offset 0.
if (!regexp_object->regex().options().has_flag_set((ECMAScriptFlags)regex::AllFlags::Internal_Stateful))
regexp_object->regex().start_offset = 0;
auto result = do_match(regexp_object->regex(), str);
return Value(result.success);
}
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::to_string)
{
auto* regexp_object = regexp_object_from(vm, global_object);

View file

@ -39,6 +39,18 @@ public:
virtual ~RegExpPrototype() override;
private:
static RegexResult do_match(const Regex<ECMA262>&, const StringView&);
JS_DECLARE_NATIVE_GETTER(dot_all);
JS_DECLARE_NATIVE_GETTER(flags);
JS_DECLARE_NATIVE_GETTER(global);
JS_DECLARE_NATIVE_GETTER(ignore_case);
JS_DECLARE_NATIVE_GETTER(multiline);
JS_DECLARE_NATIVE_GETTER(source);
JS_DECLARE_NATIVE_GETTER(sticky);
JS_DECLARE_NATIVE_GETTER(unicode);
JS_DECLARE_NATIVE_FUNCTION(test);
JS_DECLARE_NATIVE_FUNCTION(to_string);
};

View file

@ -0,0 +1,58 @@
test("basic functionality", () => {
expect(RegExp.prototype.test).toHaveLength(1);
});
test("simple test", () => {
let re = /test/;
expect(re.test("test")).toBe(true);
expect(re.test("test")).toBe(true);
});
test("simple global test", () => {
let re = /test/g;
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(false);
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(true);
});
test("global test with offset lastIndex", () => {
let re = /test/g;
re.lastIndex = 2;
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(false);
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(true);
expect(re.test("testtest")).toBe(false);
});
test("sticky test with offset lastIndex", () => {
let re = /test/y;
re.lastIndex = 2;
expect(re.test("aatest")).toBe(true);
expect(re.test("aatest")).toBe(false);
expect(re.test("aatest")).toBe(false);
});
test("flag and options", () => {
expect(/foo/gi.flags).toBe("gi");
expect(/foo/mu.flags).toBe("mu");
expect(/foo/gimsuy.flags).toBe("gimsuy");
let re = /foo/gim;
expect(re.dotAll).toBe(false);
expect(re.global).toBe(true);
expect(re.ignoreCase).toBe(true);
expect(re.multiline).toBe(true);
expect(re.sticky).toBe(false);
expect(re.unicode).toBe(false);
expect(() => {
/foo/gg;
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
expect(() => {
/foo/x;
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
});