1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 11:17:44 +00:00

LibRegex: Allow references to capture groups that aren't parsed yet

This only applies to the ECMA262 parser.
This behaviour is an ECMA262-specific quirk, such references always
generate zero-length matches (even on subsequent passes).
Also adds a test in LibJS's test suite.

Fixes #6039.
This commit is contained in:
AnotherTest 2021-04-01 18:30:47 +04:30 committed by Andreas Kling
parent 804ab79995
commit 6bbb26fdaf
6 changed files with 80 additions and 6 deletions

View file

@ -210,6 +210,14 @@ private:
bool parse_legacy_octal_escape_sequence(ByteCode& bytecode_stack, size_t& length);
Optional<u8> parse_legacy_octal_escape();
size_t ensure_total_number_of_capturing_parenthesis();
// ECMA-262's flavour of regex is a bit weird in that it allows backrefs to reference "future" captures, and such backrefs
// always match the empty string. So we have to know how many capturing parenthesis there are, but we don't want to always
// parse it twice, so we'll just do so when it's actually needed.
// Most patterns should have no need to ever populate this field.
Optional<size_t> m_total_number_of_capturing_parenthesis;
// Keep the Annex B. behaviour behind a flag, the users can enable it by passing the `ECMAScriptFlags::BrowserExtended` flag.
bool m_should_use_browser_extended_grammar { false };
};