1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 00:27:45 +00:00

LibRegex: Clear previous capture group contents in ECMA262 mode

ECMA262 requires that the capture groups only contain the values from
the last iteration, e.g. `((c)(a)?(b))` should _not_ contain 'a' in the
second capture group when matching "cabcb".
This commit is contained in:
Ali Mohammad Pur 2021-07-23 19:37:18 +04:30 committed by Ali Mohammad Pur
parent 34ec0fa8ad
commit c8b2199251
4 changed files with 109 additions and 1 deletions

View file

@ -190,12 +190,14 @@ public:
explicit ECMA262Parser(Lexer& lexer)
: Parser(lexer)
{
m_capture_groups_in_scope.empend();
}
ECMA262Parser(Lexer& lexer, Optional<typename ParserTraits<ECMA262Parser>::OptionsType> regex_options)
: Parser(lexer, regex_options.value_or({}))
{
m_should_use_browser_extended_grammar = regex_options.has_value() && regex_options->has_flag_set(ECMAScriptFlags::BrowserExtended);
m_capture_groups_in_scope.empend();
}
~ECMA262Parser() = default;
@ -242,6 +244,12 @@ private:
// Keep the Annex B. behaviour behind a flag, the users can enable it by passing the `ECMAScriptFlags::BrowserExtended` flag.
bool m_should_use_browser_extended_grammar { false };
// ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it,
// by requiring that (...)+ only contain the matches for the last iteration.
// To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed.
using CaptureGroup = Variant<size_t, String>;
Vector<Vector<CaptureGroup>> m_capture_groups_in_scope;
};
using PosixExtended = PosixExtendedParser;