mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 08:27:45 +00:00
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
This helps us not blow up when too many disjunctions are chained togther in the regex we're parsing. Fixes #12615.
This commit is contained in:
parent
627bbee055
commit
4be7239626
2 changed files with 33 additions and 21 deletions
|
@ -498,6 +498,8 @@ TEST_CASE(posix_extended_nested_capture_group)
|
||||||
EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
|
EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto parse_test_case_long_disjunction_chain = String::repeated("a|"sv, 10000);
|
||||||
|
|
||||||
TEST_CASE(ECMA262_parse)
|
TEST_CASE(ECMA262_parse)
|
||||||
{
|
{
|
||||||
struct _test {
|
struct _test {
|
||||||
|
@ -506,7 +508,7 @@ TEST_CASE(ECMA262_parse)
|
||||||
regex::ECMAScriptFlags flags {};
|
regex::ECMAScriptFlags flags {};
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr _test tests[] {
|
_test const tests[] {
|
||||||
{ "^hello.$"sv },
|
{ "^hello.$"sv },
|
||||||
{ "^(hello.)$"sv },
|
{ "^(hello.)$"sv },
|
||||||
{ "^h{0,1}ello.$"sv },
|
{ "^h{0,1}ello.$"sv },
|
||||||
|
@ -599,7 +601,8 @@ TEST_CASE(ECMA262_parse)
|
||||||
{ "(?<$$_$$>a)"sv },
|
{ "(?<$$_$$>a)"sv },
|
||||||
{ "(?<ÿ>a)"sv },
|
{ "(?<ÿ>a)"sv },
|
||||||
{ "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
|
{ "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
|
||||||
{ "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended } // #12373, quantifiable assertions.
|
{ "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
|
||||||
|
{ parse_test_case_long_disjunction_chain.view() }, // A whole lot of disjunctions, should not overflow the stack.
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
|
|
|
@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum,
|
||||||
|
|
||||||
bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
||||||
{
|
{
|
||||||
ByteCode left_alternative_stack;
|
size_t total_match_length_minimum = NumericLimits<size_t>::max();
|
||||||
size_t left_alternative_min_length = 0;
|
Vector<ByteCode> alternatives;
|
||||||
auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named);
|
do {
|
||||||
if (!alt_ok)
|
ByteCode alternative_stack;
|
||||||
return false;
|
size_t alternative_minimum_length = 0;
|
||||||
|
auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named);
|
||||||
|
if (!alt_ok)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (!match(TokenType::Pipe)) {
|
alternatives.append(move(alternative_stack));
|
||||||
stack.extend(left_alternative_stack);
|
total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum);
|
||||||
match_length_minimum = left_alternative_min_length;
|
|
||||||
return alt_ok;
|
if (!match(TokenType::Pipe))
|
||||||
|
break;
|
||||||
|
consume();
|
||||||
|
} while (true);
|
||||||
|
|
||||||
|
Optional<ByteCode> alternative_stack {};
|
||||||
|
for (auto& alternative : alternatives) {
|
||||||
|
if (alternative_stack.has_value()) {
|
||||||
|
ByteCode target_stack;
|
||||||
|
target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative));
|
||||||
|
alternative_stack = move(target_stack);
|
||||||
|
} else {
|
||||||
|
alternative_stack = move(alternative);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consume();
|
stack.extend(alternative_stack.release_value());
|
||||||
ByteCode right_alternative_stack;
|
match_length_minimum = total_match_length_minimum;
|
||||||
size_t right_alternative_min_length = 0;
|
return true;
|
||||||
auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named);
|
|
||||||
if (!continuation_ok)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack));
|
|
||||||
match_length_minimum = min(left_alternative_min_length, right_alternative_min_length);
|
|
||||||
return continuation_ok;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue