mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 00:27:45 +00:00
LibRegex: Clear previous capture group contents in ECMA262 mode
ECMA262 requires that the capture groups only contain the values from the last iteration, e.g. `((c)(a)?(b))` should _not_ contain 'a' in the second capture group when matching "cabcb".
This commit is contained in:
parent
34ec0fa8ad
commit
c8b2199251
4 changed files with 109 additions and 1 deletions
|
@ -190,12 +190,14 @@ public:
|
|||
explicit ECMA262Parser(Lexer& lexer)
|
||||
: Parser(lexer)
|
||||
{
|
||||
m_capture_groups_in_scope.empend();
|
||||
}
|
||||
|
||||
ECMA262Parser(Lexer& lexer, Optional<typename ParserTraits<ECMA262Parser>::OptionsType> regex_options)
|
||||
: Parser(lexer, regex_options.value_or({}))
|
||||
{
|
||||
m_should_use_browser_extended_grammar = regex_options.has_value() && regex_options->has_flag_set(ECMAScriptFlags::BrowserExtended);
|
||||
m_capture_groups_in_scope.empend();
|
||||
}
|
||||
|
||||
~ECMA262Parser() = default;
|
||||
|
@ -242,6 +244,12 @@ private:
|
|||
|
||||
// Keep the Annex B. behaviour behind a flag, the users can enable it by passing the `ECMAScriptFlags::BrowserExtended` flag.
|
||||
bool m_should_use_browser_extended_grammar { false };
|
||||
|
||||
// ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it,
|
||||
// by requiring that (...)+ only contain the matches for the last iteration.
|
||||
// To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed.
|
||||
using CaptureGroup = Variant<size_t, String>;
|
||||
Vector<Vector<CaptureGroup>> m_capture_groups_in_scope;
|
||||
};
|
||||
|
||||
using PosixExtended = PosixExtendedParser;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue