mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:37:46 +00:00
LibJS: Parse RegExp literals at AST creation time, not execution time
The spec requires that invalid RegExp literals must cause a Syntax Error before the JavaScript is executed. See: https://tc39.es/ecma262/#sec-patterns-static-semantics-early-errors This is explicitly tested in the RegExp/property-escapes test262 tests. For example, see unsupported-property-Line_Break.js: $DONOTEVALUATE(); /\p{Line_Break}/u; That RegExp literal is invalid because Line_Break is not a supported Unicode property. $DONOTEVALUATE() just throws an exception when it is executed. The test expects that this file will fail to be parsed. Note that RegExp patterns can still be parsed at execution time by way of "new RegExp(...)".
This commit is contained in:
parent
1400e3cf58
commit
f1dd770a8a
6 changed files with 124 additions and 94 deletions
|
@ -14,97 +14,108 @@
|
|||
|
||||
namespace JS {
|
||||
|
||||
static Flags options_from(GlobalObject& global_object, const String& flags)
|
||||
Result<regex::RegexOptions<ECMAScriptFlags>, String> regex_flags_from_string(StringView flags)
|
||||
{
|
||||
auto& vm = global_object.vm();
|
||||
bool d = false, g = false, i = false, m = false, s = false, u = false, y = false;
|
||||
Flags options {
|
||||
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
||||
.effective_flags = { (regex::ECMAScriptFlags)regex::AllFlags::Global | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended },
|
||||
.declared_flags = {},
|
||||
};
|
||||
auto options = RegExpObject::default_flags;
|
||||
|
||||
for (auto ch : flags) {
|
||||
switch (ch) {
|
||||
case 'd':
|
||||
if (d)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
d = true;
|
||||
break;
|
||||
case 'g':
|
||||
if (g)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
g = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Global;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Global;
|
||||
options |= regex::ECMAScriptFlags::Global;
|
||||
break;
|
||||
case 'i':
|
||||
if (i)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
i = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
options |= regex::ECMAScriptFlags::Insensitive;
|
||||
break;
|
||||
case 'm':
|
||||
if (m)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
m = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
options |= regex::ECMAScriptFlags::Multiline;
|
||||
break;
|
||||
case 's':
|
||||
if (s)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
s = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
options |= regex::ECMAScriptFlags::SingleLine;
|
||||
break;
|
||||
case 'u':
|
||||
if (u)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
u = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
options |= regex::ECMAScriptFlags::Unicode;
|
||||
break;
|
||||
case 'y':
|
||||
if (y)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
|
||||
y = true;
|
||||
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
|
||||
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
|
||||
options.reset_flag(regex::ECMAScriptFlags::Global);
|
||||
// "What's the difference between sticky and global, then", that's simple.
|
||||
// all the other flags imply 'global', and the "global" flag implies 'stateful';
|
||||
// however, the "sticky" flag does *not* imply 'global', only 'stateful'.
|
||||
options.effective_flags |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
options |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
|
||||
options |= regex::ECMAScriptFlags::Sticky;
|
||||
break;
|
||||
default:
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
|
||||
return options;
|
||||
return String::formatted(ErrorType::RegExpObjectBadFlag.message(), ch);
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
RegExpObject* RegExpObject::create(GlobalObject& global_object, String original_pattern, String parsed_pattern, String flags)
|
||||
String parse_regex_pattern(StringView pattern, bool unicode)
|
||||
{
|
||||
return global_object.heap().allocate<RegExpObject>(global_object, move(original_pattern), move(parsed_pattern), move(flags), *global_object.regexp_prototype());
|
||||
auto utf16_pattern = AK::utf8_to_utf16(pattern);
|
||||
Utf16View utf16_pattern_view { utf16_pattern };
|
||||
StringBuilder builder;
|
||||
|
||||
// If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
|
||||
// code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
|
||||
for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
|
||||
if (unicode) {
|
||||
auto code_point = code_point_at(utf16_pattern_view, i);
|
||||
builder.append_code_point(code_point.code_point);
|
||||
i += code_point.code_unit_count;
|
||||
continue;
|
||||
}
|
||||
|
||||
u16 code_unit = utf16_pattern_view.code_unit_at(i);
|
||||
++i;
|
||||
|
||||
if (code_unit > 0x7f)
|
||||
builder.appendff("\\u{:04x}", code_unit);
|
||||
else
|
||||
builder.append_code_point(code_unit);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
RegExpObject::RegExpObject(String original_pattern, String parsed_pattern, String flags, Object& prototype)
|
||||
: Object(prototype)
|
||||
, m_original_pattern(move(original_pattern))
|
||||
, m_parsed_pattern(move(parsed_pattern))
|
||||
, m_flags(move(flags))
|
||||
, m_active_flags(options_from(global_object(), m_flags))
|
||||
, m_regex(m_parsed_pattern, m_active_flags.effective_flags)
|
||||
RegExpObject* RegExpObject::create(GlobalObject& global_object, Regex<ECMA262> regex, String pattern, String flags)
|
||||
{
|
||||
if (m_regex.parser_result.error != regex::Error::NoError) {
|
||||
vm().throw_exception<SyntaxError>(global_object(), ErrorType::RegExpCompileError, m_regex.error_string());
|
||||
}
|
||||
return global_object.heap().allocate<RegExpObject>(global_object, move(regex), move(pattern), move(flags), *global_object.regexp_prototype());
|
||||
}
|
||||
|
||||
RegExpObject::RegExpObject(Regex<ECMA262> regex, String pattern, String flags, Object& prototype)
|
||||
: Object(prototype)
|
||||
, m_pattern(move(pattern))
|
||||
, m_flags(move(flags))
|
||||
, m_regex(move(regex))
|
||||
{
|
||||
VERIFY(m_regex.parser_result.error == regex::Error::NoError);
|
||||
}
|
||||
|
||||
RegExpObject::~RegExpObject()
|
||||
|
@ -115,7 +126,7 @@ void RegExpObject::initialize(GlobalObject& global_object)
|
|||
{
|
||||
auto& vm = this->vm();
|
||||
Object::initialize(global_object);
|
||||
define_direct_property(vm.names.lastIndex, {}, Attribute::Writable);
|
||||
define_direct_property(vm.names.lastIndex, Value(0), Attribute::Writable);
|
||||
}
|
||||
|
||||
// 22.2.3.2.4 RegExpCreate ( P, F ), https://tc39.es/ecma262/#sec-regexpcreate
|
||||
|
@ -139,38 +150,27 @@ RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value fl
|
|||
original_pattern = String::empty();
|
||||
parsed_pattern = String::empty();
|
||||
} else {
|
||||
auto utf16_pattern = pattern.to_utf16_string(global_object);
|
||||
original_pattern = pattern.to_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
||||
Utf16View utf16_pattern_view { utf16_pattern };
|
||||
bool unicode = f.find('u').has_value();
|
||||
StringBuilder builder;
|
||||
|
||||
// If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
|
||||
// code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
|
||||
for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
|
||||
if (unicode) {
|
||||
auto code_point = code_point_at(utf16_pattern_view, i);
|
||||
builder.append_code_point(code_point.code_point);
|
||||
i += code_point.code_unit_count;
|
||||
continue;
|
||||
}
|
||||
|
||||
u16 code_unit = utf16_pattern_view.code_unit_at(i);
|
||||
++i;
|
||||
|
||||
if (code_unit > 0x7f)
|
||||
builder.appendff("\\u{:04x}", code_unit);
|
||||
else
|
||||
builder.append_code_point(code_unit);
|
||||
}
|
||||
|
||||
original_pattern = utf16_pattern_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
|
||||
parsed_pattern = builder.build();
|
||||
parsed_pattern = parse_regex_pattern(original_pattern, unicode);
|
||||
}
|
||||
|
||||
auto* object = RegExpObject::create(global_object, move(original_pattern), move(parsed_pattern), move(f));
|
||||
auto parsed_flags_or_error = regex_flags_from_string(f);
|
||||
if (parsed_flags_or_error.is_error()) {
|
||||
vm.throw_exception(global_object, SyntaxError::create(global_object, parsed_flags_or_error.release_error()));
|
||||
return {};
|
||||
}
|
||||
|
||||
Regex<ECMA262> regex(move(parsed_pattern), parsed_flags_or_error.release_value());
|
||||
if (regex.parser_result.error != regex::Error::NoError) {
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpCompileError, regex.error_string());
|
||||
return {};
|
||||
}
|
||||
|
||||
auto* object = RegExpObject::create(global_object, move(regex), move(original_pattern), move(f));
|
||||
object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue