mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 05:17:35 +00:00
LibRegex: Allow references to capture groups that aren't parsed yet
This only applies to the ECMA262 parser. This behaviour is an ECMA262-specific quirk, such references always generate zero-length matches (even on subsequent passes). Also adds a test in LibJS's test suite. Fixes #6039.
This commit is contained in:
parent
804ab79995
commit
6bbb26fdaf
6 changed files with 80 additions and 6 deletions
|
@ -1114,12 +1114,19 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
|||
{
|
||||
if (auto escape_str = read_digits_as_string(ReadDigitsInitialZeroState::Disallow, ReadDigitFollowPolicy::DisallowNonDigit); !escape_str.is_empty()) {
|
||||
if (auto escape = escape_str.to_uint(); escape.has_value()) {
|
||||
// See if this is a "back"-reference (we've already parsed the group it refers to)
|
||||
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(escape.value());
|
||||
if (maybe_length.has_value()) {
|
||||
match_length_minimum += maybe_length.value();
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)escape.value() } });
|
||||
return true;
|
||||
}
|
||||
// It's not a pattern seen before, so we have to see if it's a valid reference to a future group.
|
||||
if (escape.value() <= ensure_total_number_of_capturing_parenthesis()) {
|
||||
// This refers to a future group, and it will _always_ be matching an empty string
|
||||
// So just match nothing and move on.
|
||||
return true;
|
||||
}
|
||||
if (!m_should_use_browser_extended_grammar) {
|
||||
set_error(Error::InvalidNumber);
|
||||
return false;
|
||||
|
@ -1729,4 +1736,47 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t ECMA262Parser::ensure_total_number_of_capturing_parenthesis()
|
||||
{
|
||||
if (m_total_number_of_capturing_parenthesis.has_value())
|
||||
return m_total_number_of_capturing_parenthesis.value();
|
||||
|
||||
GenericLexer lexer { m_parser_state.lexer.source() };
|
||||
size_t count = 0;
|
||||
while (!lexer.is_eof()) {
|
||||
switch (lexer.peek()) {
|
||||
case '\\':
|
||||
lexer.consume(2);
|
||||
continue;
|
||||
case '[':
|
||||
while (!lexer.is_eof()) {
|
||||
if (lexer.consume_specific('\\'))
|
||||
lexer.consume();
|
||||
else if (lexer.consume_specific(']'))
|
||||
break;
|
||||
lexer.consume();
|
||||
}
|
||||
break;
|
||||
case '(':
|
||||
if (lexer.consume_specific('?')) {
|
||||
// non-capturing group '(?:', lookaround '(?<='/'(?<!', or named capture '(?<'
|
||||
if (!lexer.consume_specific('<'))
|
||||
break;
|
||||
|
||||
if (lexer.next_is(is_any_of("=!")))
|
||||
break;
|
||||
|
||||
++count;
|
||||
} else {
|
||||
++count;
|
||||
}
|
||||
break;
|
||||
}
|
||||
lexer.consume();
|
||||
}
|
||||
|
||||
m_total_number_of_capturing_parenthesis = count;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue