1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-23 19:27:43 +00:00

LibRegex: Disallow invalid interval qualifiers in Unicode mode

Fixes all remaining 'built-ins/RegExp/property-escapes' test262 tests.
This commit is contained in:
Timothy Flynn 2021-08-10 16:35:45 -04:00 committed by Andreas Kling
parent a98d3a1a85
commit df14d11a11
3 changed files with 63 additions and 45 deletions

View file

@ -522,6 +522,9 @@ TEST_CASE(ECMA262_parse)
{ "\\p{hello friends}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode }, { "\\p{hello friends}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
{ "\\p{Prepended_Concatenation_Mark}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode }, { "\\p{Prepended_Concatenation_Mark}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
{ "\\p{ASCII}", regex::Error::NoError, ECMAScriptFlags::Unicode }, { "\\p{ASCII}", regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
{ "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
}; };
for (auto& test : tests) { for (auto& test : tests) {

View file

@ -1121,7 +1121,7 @@ Optional<unsigned> ECMA262Parser::read_digits(ECMA262Parser::ReadDigitsInitialZe
return str.to_uint(); return str.to_uint();
} }
bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool, bool) bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool)
{ {
enum class Repetition { enum class Repetition {
OneOrMore, OneOrMore,
@ -1144,52 +1144,13 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
consume(); consume();
repetition_mark = Repetition::Optional; repetition_mark = Repetition::Optional;
} else if (match(TokenType::LeftCurly)) { } else if (match(TokenType::LeftCurly)) {
consume();
auto chars_consumed = 1;
repetition_mark = Repetition::Explicit; repetition_mark = Repetition::Explicit;
if (!parse_interval_quantifier(repeat_min, repeat_max)) {
auto low_bound_string = read_digits_as_string(); if (unicode) {
chars_consumed += low_bound_string.length(); // Invalid interval quantifiers are disallowed in Unicode mod - they must be esacped with '\{'.
set_error(Error::InvalidPattern);
auto low_bound = low_bound_string.to_uint();
if (!low_bound.has_value()) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return true;
}
repeat_min = low_bound.value();
if (match(TokenType::Comma)) {
consume();
++chars_consumed;
auto high_bound_string = read_digits_as_string();
auto high_bound = high_bound_string.to_uint();
if (high_bound.has_value()) {
repeat_max = high_bound.value();
chars_consumed += high_bound_string.length();
} }
} else { return !has_error();
repeat_max = repeat_min;
}
if (!match(TokenType::RightCurly)) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return true;
}
consume();
++chars_consumed;
if (repeat_max.has_value()) {
if (repeat_min.value() > repeat_max.value())
set_error(Error::InvalidBraceContent);
} }
} else { } else {
return true; return true;
@ -1223,6 +1184,59 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
return true; return true;
} }
bool ECMA262Parser::parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max)
{
VERIFY(match(TokenType::LeftCurly));
consume();
auto chars_consumed = 1;
auto low_bound_string = read_digits_as_string();
chars_consumed += low_bound_string.length();
auto low_bound = low_bound_string.to_uint();
if (!low_bound.has_value()) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return false;
}
repeat_min = low_bound.value();
if (match(TokenType::Comma)) {
consume();
++chars_consumed;
auto high_bound_string = read_digits_as_string();
auto high_bound = high_bound_string.to_uint();
if (high_bound.has_value()) {
repeat_max = high_bound.value();
chars_consumed += high_bound_string.length();
}
} else {
repeat_max = repeat_min;
}
if (!match(TokenType::RightCurly)) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return false;
}
consume();
++chars_consumed;
if (repeat_max.has_value()) {
if (repeat_min.value() > repeat_max.value())
set_error(Error::InvalidBraceContent);
}
return true;
}
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
{ {
if (match(TokenType::EscapeSequence)) { if (match(TokenType::EscapeSequence)) {

View file

@ -228,6 +228,7 @@ private:
bool parse_assertion(ByteCode&, size_t&, bool unicode, bool named); bool parse_assertion(ByteCode&, size_t&, bool unicode, bool named);
bool parse_atom(ByteCode&, size_t&, bool unicode, bool named); bool parse_atom(ByteCode&, size_t&, bool unicode, bool named);
bool parse_quantifier(ByteCode&, size_t&, bool unicode, bool named); bool parse_quantifier(ByteCode&, size_t&, bool unicode, bool named);
bool parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max);
bool parse_atom_escape(ByteCode&, size_t&, bool unicode, bool named); bool parse_atom_escape(ByteCode&, size_t&, bool unicode, bool named);
bool parse_character_class(ByteCode&, size_t&, bool unicode, bool named); bool parse_character_class(ByteCode&, size_t&, bool unicode, bool named);
bool parse_capture_group(ByteCode&, size_t&, bool unicode, bool named); bool parse_capture_group(ByteCode&, size_t&, bool unicode, bool named);