mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 03:17:35 +00:00
LibRegex: Implement legacy octal escape parsing closer to the spec
The grammar for the ECMA-262 CharacterEscape is: CharacterEscape[U, N] :: ControlEscape c ControlLetter 0 [lookahead ∉ DecimalDigit] HexEscapeSequence RegExpUnicodeEscapeSequence[?U] [~U]LegacyOctalEscapeSequence IdentityEscape[?U, ?N] It's important to parse the standalone "\0 [lookahead ∉ DecimalDigit]" before parsing LegacyOctalEscapeSequence. Otherwise, all standalone "\0" patterns are parsed as octal, which are disallowed in Unicode mode. Further, LegacyOctalEscapeSequence should also be parsed while parsing character classes.
This commit is contained in:
parent
83ca8c7e38
commit
6a485f612f
2 changed files with 43 additions and 9 deletions
|
@ -22,6 +22,12 @@ static PosixOptions match_test_api_options(const PosixOptions options)
|
|||
return options;
|
||||
}
|
||||
|
||||
template<typename... Flags>
|
||||
static constexpr ECMAScriptFlags combine_flags(Flags&&... flags) requires((IsSame<Flags, ECMAScriptFlags> && ...))
|
||||
{
|
||||
return static_cast<ECMAScriptFlags>((static_cast<regex::FlagsUnderlyingType>(flags) | ...));
|
||||
}
|
||||
|
||||
TEST_CASE(regex_options_ecmascript)
|
||||
{
|
||||
ECMAScriptOptions eo;
|
||||
|
@ -543,6 +549,14 @@ TEST_CASE(ECMA262_parse)
|
|||
{ "\\A"sv, regex::Error::InvalidCharacterClass, ECMAScriptFlags::Unicode },
|
||||
{ "[\\A]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\A]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\0"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\0"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
{ "\\00"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\00"sv, regex::Error::InvalidCharacterClass, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
{ "[\\0]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
{ "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
|
@ -606,6 +620,12 @@ TEST_CASE(ECMA262_match)
|
|||
"return /xx/"sv, true, ECMAScriptFlags::BrowserExtended
|
||||
}, // #5517, appears to be matching JS expressions that involve regular expressions...
|
||||
{ "a{2,}"sv, "aaaa"sv }, // #5518
|
||||
{ "\\0"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\0"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
{ "\\01"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\0]"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\0]"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||
{ "[\\01]"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue