mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 09:07:35 +00:00
LibRegex: Use correct source characters for Unicode identity escapes
This commit is contained in:
parent
6a485f612f
commit
c3e1f1f687
2 changed files with 20 additions and 2 deletions
|
@ -557,6 +557,8 @@ TEST_CASE(ECMA262_parse)
|
||||||
{ "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
{ "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||||
{ "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
{ "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||||
{ "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
{ "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
|
||||||
|
{ "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
|
||||||
|
{ "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
|
@ -605,6 +607,8 @@ TEST_CASE(ECMA262_match)
|
||||||
{ "((...)X)+"sv, "fooXbarXbazX"sv, true },
|
{ "((...)X)+"sv, "fooXbarXbazX"sv, true },
|
||||||
{ "(?:)"sv, ""sv, true },
|
{ "(?:)"sv, ""sv, true },
|
||||||
{ "\\^"sv, "^"sv },
|
{ "\\^"sv, "^"sv },
|
||||||
|
{ "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
|
||||||
|
{ "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]{15}"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
|
||||||
// ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
|
// ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
|
||||||
{ "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
|
{ "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||||
{ "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
|
{ "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
|
||||||
|
|
|
@ -20,6 +20,15 @@ static constexpr size_t s_maximum_repetition_count = 1024 * 1024;
|
||||||
static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv;
|
static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv;
|
||||||
static constexpr auto s_decimal_characters = "0123456789"sv;
|
static constexpr auto s_decimal_characters = "0123456789"sv;
|
||||||
|
|
||||||
|
static constexpr StringView identity_escape_characters(bool unicode, bool browser_extended)
|
||||||
|
{
|
||||||
|
if (unicode)
|
||||||
|
return "^$\\.*+?()[]{}|/"sv;
|
||||||
|
if (browser_extended)
|
||||||
|
return "^$\\.*+?()[|"sv;
|
||||||
|
return "^$\\.*+?()[]{}|"sv;
|
||||||
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE bool Parser::set_error(Error error)
|
ALWAYS_INLINE bool Parser::set_error(Error error)
|
||||||
{
|
{
|
||||||
if (m_parser_state.error == Error::NoError) {
|
if (m_parser_state.error == Error::NoError) {
|
||||||
|
@ -1530,8 +1539,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
||||||
}
|
}
|
||||||
|
|
||||||
// IdentityEscape
|
// IdentityEscape
|
||||||
auto source_characters = m_should_use_browser_extended_grammar ? "^$\\.*+?()[|"sv : "^$\\.*+?()[]{}|"sv;
|
for (auto ch : identity_escape_characters(unicode, m_should_use_browser_extended_grammar)) {
|
||||||
for (auto ch : source_characters) {
|
|
||||||
if (try_skip({ &ch, 1 })) {
|
if (try_skip({ &ch, 1 })) {
|
||||||
match_length_minimum += 1;
|
match_length_minimum += 1;
|
||||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } });
|
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } });
|
||||||
|
@ -1840,6 +1848,12 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IdentityEscape
|
||||||
|
for (auto ch : identity_escape_characters(unicode, m_should_use_browser_extended_grammar)) {
|
||||||
|
if (try_skip({ &ch, 1 }))
|
||||||
|
return { CharClassRangeElement { .code_point = (u32)ch, .is_character_class = false } };
|
||||||
|
}
|
||||||
|
|
||||||
if (unicode) {
|
if (unicode) {
|
||||||
if (try_skip("-"))
|
if (try_skip("-"))
|
||||||
return { CharClassRangeElement { .code_point = '-', .is_character_class = false } };
|
return { CharClassRangeElement { .code_point = '-', .is_character_class = false } };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue