mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 22:27:35 +00:00
LibJS: Hook up Regex<ECMA262> to RegExpObject and implement `test()'
This makes RegExpObject compile and store a Regex<ECMA262>, adds all flag-related properties, and implements `RegExpPrototype.test()` (complete with 'lastIndex' support) :^) It should be noted that this only implements `test()' using the builtin `exec()'.
This commit is contained in:
parent
75081b2bdd
commit
8ba273a2f3
13 changed files with 396 additions and 12 deletions
|
@ -86,6 +86,7 @@ namespace JS {
|
|||
P(deleteProperty) \
|
||||
P(description) \
|
||||
P(done) \
|
||||
P(dotAll) \
|
||||
P(entries) \
|
||||
P(enumerable) \
|
||||
P(error) \
|
||||
|
@ -96,6 +97,7 @@ namespace JS {
|
|||
P(filter) \
|
||||
P(find) \
|
||||
P(findIndex) \
|
||||
P(flags) \
|
||||
P(floor) \
|
||||
P(forEach) \
|
||||
P(from) \
|
||||
|
@ -122,9 +124,11 @@ namespace JS {
|
|||
P(getUTCMinutes) \
|
||||
P(getUTCMonth) \
|
||||
P(getUTCSeconds) \
|
||||
P(global) \
|
||||
P(globalThis) \
|
||||
P(has) \
|
||||
P(hasOwnProperty) \
|
||||
P(ignoreCase) \
|
||||
P(includes) \
|
||||
P(indexOf) \
|
||||
P(info) \
|
||||
|
@ -138,6 +142,7 @@ namespace JS {
|
|||
P(join) \
|
||||
P(keyFor) \
|
||||
P(keys) \
|
||||
P(lastIndex) \
|
||||
P(lastIndexOf) \
|
||||
P(length) \
|
||||
P(log) \
|
||||
|
@ -146,6 +151,7 @@ namespace JS {
|
|||
P(max) \
|
||||
P(message) \
|
||||
P(min) \
|
||||
P(multiline) \
|
||||
P(name) \
|
||||
P(next) \
|
||||
P(now) \
|
||||
|
@ -174,12 +180,15 @@ namespace JS {
|
|||
P(sin) \
|
||||
P(slice) \
|
||||
P(some) \
|
||||
P(source) \
|
||||
P(splice) \
|
||||
P(sqrt) \
|
||||
P(startsWith) \
|
||||
P(stringify) \
|
||||
P(sticky) \
|
||||
P(substring) \
|
||||
P(tan) \
|
||||
P(test) \
|
||||
P(toDateString) \
|
||||
P(toISOString) \
|
||||
P(toJSON) \
|
||||
|
@ -196,6 +205,7 @@ namespace JS {
|
|||
P(trimStart) \
|
||||
P(trunc) \
|
||||
P(undefined) \
|
||||
P(unicode) \
|
||||
P(unshift) \
|
||||
P(value) \
|
||||
P(valueOf) \
|
||||
|
|
|
@ -146,6 +146,9 @@
|
|||
M(ReflectBadArgumentsList, "Arguments list must be an object") \
|
||||
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
|
||||
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
|
||||
M(RegExpCompileError, "RegExp compile error: '{}'") \
|
||||
M(RegExpObjectBadFlag, "Invalid RegExp flag '{}'") \
|
||||
M(RegExpObjectRepeatedFlag, "Repeated RegExp flag '{}'") \
|
||||
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
|
||||
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
|
||||
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/Function.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibJS/Heap/Heap.h>
|
||||
#include <LibJS/Runtime/GlobalObject.h>
|
||||
|
@ -33,6 +34,73 @@
|
|||
|
||||
namespace JS {
|
||||
|
||||
static Flags options_from(const String& flags, VM& vm, GlobalObject& global_object)
|
||||
{
|
||||
bool g = false, i = false, m = false, s = false, u = false, y = false;
|
||||
Flags options {
|
||||
{ (regex::ECMAScriptFlags)regex::AllFlags::Global }, // JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||
{},
|
||||
};
|
||||
|
||||
for (auto ch : flags) {
|
||||
switch (ch) {
|
||||
case 'g':
|
||||
if (g)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
g = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Global;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Global;
|
||||
break;
|
||||
case 'i':
|
||||
if (i)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
i = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
break;
|
||||
case 'm':
|
||||
if (m)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
m = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
break;
|
||||
case 's':
|
||||
if (s)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
s = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
break;
|
||||
case 'u':
|
||||
if (u)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
u = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
break;
|
||||
case 'y':
|
||||
if (y)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
y = true;
|
||||
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
|
||||
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
|
||||
// "What's the difference between sticky and global, then", that's simple.
|
||||
// all the other flags imply 'global', and the "global" flag implies 'stateful';
|
||||
// however, the "sticky" flag does *not* imply 'global', only 'stateful'.
|
||||
options.effective_flags |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
break;
|
||||
default:
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
|
||||
return options;
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
RegExpObject* RegExpObject::create(GlobalObject& global_object, String pattern, String flags)
|
||||
{
|
||||
return global_object.heap().allocate<RegExpObject>(global_object, pattern, flags, *global_object.regexp_prototype());
|
||||
|
@ -42,11 +110,61 @@ RegExpObject::RegExpObject(String pattern, String flags, Object& prototype)
|
|||
: Object(prototype)
|
||||
, m_pattern(pattern)
|
||||
, m_flags(flags)
|
||||
, m_active_flags(options_from(m_flags, this->vm(), this->global_object()))
|
||||
, m_regex(pattern, m_active_flags.effective_flags)
|
||||
{
|
||||
if (m_regex.parser_result.error != regex::Error::NoError) {
|
||||
vm().throw_exception<SyntaxError>(global_object(), ErrorType::RegExpCompileError, m_regex.error_string());
|
||||
}
|
||||
}
|
||||
|
||||
void RegExpObject::initialize(GlobalObject& global_object)
|
||||
{
|
||||
auto& vm = this->vm();
|
||||
Object::initialize(global_object);
|
||||
|
||||
define_native_property(vm.names.lastIndex, last_index, set_last_index, Attribute::Writable);
|
||||
}
|
||||
|
||||
RegExpObject::~RegExpObject()
|
||||
{
|
||||
}
|
||||
|
||||
static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
|
||||
{
|
||||
auto* this_object = vm.this_value(global_object).to_object(global_object);
|
||||
if (!this_object)
|
||||
return nullptr;
|
||||
if (!this_object->is_regexp_object()) {
|
||||
vm.throw_exception<TypeError>(global_object, ErrorType::NotA, "RegExp");
|
||||
return nullptr;
|
||||
}
|
||||
return static_cast<RegExpObject*>(this_object);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpObject::last_index)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value((unsigned)regexp_object->regex().start_offset);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_SETTER(RegExpObject::set_last_index)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return;
|
||||
|
||||
auto index = value.to_i32(global_object);
|
||||
if (vm.exception())
|
||||
return;
|
||||
|
||||
if (index < 0)
|
||||
index = 0;
|
||||
|
||||
regexp_object->regex().start_offset = index;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,6 +28,12 @@
|
|||
|
||||
#include <LibJS/AST.h>
|
||||
#include <LibJS/Runtime/Object.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
|
||||
struct Flags {
|
||||
regex::RegexOptions<ECMAScriptFlags> effective_flags;
|
||||
regex::RegexOptions<ECMAScriptFlags> declared_flags;
|
||||
};
|
||||
|
||||
namespace JS {
|
||||
|
||||
|
@ -38,16 +44,25 @@ public:
|
|||
static RegExpObject* create(GlobalObject&, String pattern, String flags);
|
||||
|
||||
RegExpObject(String pattern, String flags, Object& prototype);
|
||||
virtual void initialize(GlobalObject&) override;
|
||||
virtual ~RegExpObject() override;
|
||||
|
||||
const String& pattern() const { return m_pattern; }
|
||||
const String& flags() const { return m_flags; }
|
||||
const regex::RegexOptions<ECMAScriptFlags>& declared_options() { return m_active_flags.declared_flags; }
|
||||
const Regex<ECMA262>& regex() { return m_regex; }
|
||||
const Regex<ECMA262>& regex() const { return m_regex; }
|
||||
|
||||
private:
|
||||
virtual bool is_regexp_object() const override { return true; }
|
||||
|
||||
JS_DECLARE_NATIVE_GETTER(last_index);
|
||||
JS_DECLARE_NATIVE_SETTER(set_last_index);
|
||||
|
||||
String m_pattern;
|
||||
String m_flags;
|
||||
Flags m_active_flags;
|
||||
Regex<ECMA262> m_regex;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -43,6 +43,17 @@ void RegExpPrototype::initialize(GlobalObject& global_object)
|
|||
Object::initialize(global_object);
|
||||
u8 attr = Attribute::Writable | Attribute::Configurable;
|
||||
define_native_function(vm.names.toString, to_string, 0, attr);
|
||||
define_native_function(vm.names.test, test, 1, attr);
|
||||
|
||||
u8 readable_attr = Attribute::Configurable;
|
||||
define_native_property(vm.names.dotAll, dot_all, nullptr, readable_attr);
|
||||
define_native_property(vm.names.flags, flags, nullptr, readable_attr);
|
||||
define_native_property(vm.names.global, global, nullptr, readable_attr);
|
||||
define_native_property(vm.names.ignoreCase, ignore_case, nullptr, readable_attr);
|
||||
define_native_property(vm.names.multiline, multiline, nullptr, readable_attr);
|
||||
define_native_property(vm.names.source, source, nullptr, readable_attr);
|
||||
define_native_property(vm.names.sticky, sticky, nullptr, readable_attr);
|
||||
define_native_property(vm.names.unicode, unicode, nullptr, readable_attr);
|
||||
}
|
||||
|
||||
RegExpPrototype::~RegExpPrototype()
|
||||
|
@ -61,6 +72,124 @@ static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
|
|||
return static_cast<RegExpObject*>(this_object);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::dot_all)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::SingleLine));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::flags)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
auto flags = regexp_object->declared_options();
|
||||
StringBuilder builder(8);
|
||||
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Global))
|
||||
builder.append('g');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Insensitive))
|
||||
builder.append('i');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Multiline))
|
||||
builder.append('m');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::SingleLine))
|
||||
builder.append('s');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Unicode))
|
||||
builder.append('u');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Sticky))
|
||||
builder.append('y');
|
||||
|
||||
return js_string(vm, builder.to_string());
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::global)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Global)); // Note that this "Global" is actually "Global | Stateful"
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::ignore_case)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Insensitive));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::multiline)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Multiline));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::source)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return js_string(vm, regexp_object->pattern());
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::sticky)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Sticky));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::unicode)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Unicode));
|
||||
}
|
||||
|
||||
RegexResult RegExpPrototype::do_match(const Regex<ECMA262>& re, const StringView& subject)
|
||||
{
|
||||
auto result = re.match(subject);
|
||||
// The 'lastIndex' property is reset on failing tests (if 'global')
|
||||
if (!result.success && re.options().has_flag_set(ECMAScriptFlags::Global))
|
||||
re.start_offset = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::test)
|
||||
{
|
||||
// FIXME: This should try using dynamic properties for 'exec' first,
|
||||
// before falling back to builtin_exec.
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
auto str = vm.argument(0).to_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
||||
// RegExps without "global" and "sticky" always start at offset 0.
|
||||
if (!regexp_object->regex().options().has_flag_set((ECMAScriptFlags)regex::AllFlags::Internal_Stateful))
|
||||
regexp_object->regex().start_offset = 0;
|
||||
|
||||
auto result = do_match(regexp_object->regex(), str);
|
||||
return Value(result.success);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::to_string)
|
||||
{
|
||||
auto* regexp_object = regexp_object_from(vm, global_object);
|
||||
|
|
|
@ -39,6 +39,18 @@ public:
|
|||
virtual ~RegExpPrototype() override;
|
||||
|
||||
private:
|
||||
static RegexResult do_match(const Regex<ECMA262>&, const StringView&);
|
||||
|
||||
JS_DECLARE_NATIVE_GETTER(dot_all);
|
||||
JS_DECLARE_NATIVE_GETTER(flags);
|
||||
JS_DECLARE_NATIVE_GETTER(global);
|
||||
JS_DECLARE_NATIVE_GETTER(ignore_case);
|
||||
JS_DECLARE_NATIVE_GETTER(multiline);
|
||||
JS_DECLARE_NATIVE_GETTER(source);
|
||||
JS_DECLARE_NATIVE_GETTER(sticky);
|
||||
JS_DECLARE_NATIVE_GETTER(unicode);
|
||||
|
||||
JS_DECLARE_NATIVE_FUNCTION(test);
|
||||
JS_DECLARE_NATIVE_FUNCTION(to_string);
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue