diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 2e8cce5c30..e77a90356e 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -557,6 +557,8 @@ TEST_CASE(ECMA262_parse) { "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) }, { "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, { "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) }, + { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, regex::Error::NoError, ECMAScriptFlags::Unicode }, + { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode }, }; for (auto& test : tests) { @@ -605,6 +607,8 @@ TEST_CASE(ECMA262_match) { "((...)X)+"sv, "fooXbarXbazX"sv, true }, { "(?:)"sv, ""sv, true }, { "\\^"sv, "^"sv }, + { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode }, + { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]{15}"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode }, // ECMA262, B.1.4. Regular Expression Pattern extensions for browsers { "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended }, { "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended }, diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index a0393a6ac0..df58488a0c 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -20,6 +20,15 @@ static constexpr size_t s_maximum_repetition_count = 1024 * 1024; static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv; static constexpr auto s_decimal_characters = "0123456789"sv; +static constexpr StringView identity_escape_characters(bool unicode, bool browser_extended) +{ + if (unicode) + return "^$\\.*+?()[]{}|/"sv; + if (browser_extended) + return "^$\\.*+?()[|"sv; + return "^$\\.*+?()[]{}|"sv; +} + ALWAYS_INLINE bool Parser::set_error(Error error) { if (m_parser_state.error == Error::NoError) { @@ -1530,8 +1539,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini } // IdentityEscape - auto source_characters = m_should_use_browser_extended_grammar ? "^$\\.*+?()[|"sv : "^$\\.*+?()[]{}|"sv; - for (auto ch : source_characters) { + for (auto ch : identity_escape_characters(unicode, m_should_use_browser_extended_grammar)) { if (try_skip({ &ch, 1 })) { match_length_minimum += 1; stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } }); @@ -1840,6 +1848,12 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector& } } + // IdentityEscape + for (auto ch : identity_escape_characters(unicode, m_should_use_browser_extended_grammar)) { + if (try_skip({ &ch, 1 })) + return { CharClassRangeElement { .code_point = (u32)ch, .is_character_class = false } }; + } + if (unicode) { if (try_skip("-")) return { CharClassRangeElement { .code_point = '-', .is_character_class = false } };