mirror of
https://github.com/RGBCube/serenity
synced 2025-07-24 20:07:34 +00:00
LibRegex: Fix parsing identity escape sequences
Also fixes the propagation of default options (the previous implementation reset them to zero before parsing...). Partially deals with #4189.
This commit is contained in:
parent
e83e7a03c2
commit
801750b95a
1 changed files with 24 additions and 7 deletions
|
@ -115,7 +115,6 @@ ALWAYS_INLINE void Parser::reset()
|
||||||
m_parser_state.current_token = m_parser_state.lexer.next();
|
m_parser_state.current_token = m_parser_state.lexer.next();
|
||||||
m_parser_state.error = Error::NoError;
|
m_parser_state.error = Error::NoError;
|
||||||
m_parser_state.error_token = { TokenType::Eof, 0, StringView(nullptr) };
|
m_parser_state.error_token = { TokenType::Eof, 0, StringView(nullptr) };
|
||||||
m_parser_state.regex_options = {};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
||||||
|
@ -621,7 +620,7 @@ bool PosixExtendedParser::parse_root(ByteCode& stack, size_t& match_length_minim
|
||||||
|
|
||||||
bool ECMA262Parser::parse_internal(ByteCode& stack, size_t& match_length_minimum)
|
bool ECMA262Parser::parse_internal(ByteCode& stack, size_t& match_length_minimum)
|
||||||
{
|
{
|
||||||
if (m_parser_state.regex_options & AllFlags::Unicode) {
|
if (m_parser_state.regex_options.has_flag_set(AllFlags::Unicode)) {
|
||||||
return parse_pattern(stack, match_length_minimum, true, true);
|
return parse_pattern(stack, match_length_minimum, true, true);
|
||||||
} else {
|
} else {
|
||||||
ByteCode new_stack;
|
ByteCode new_stack;
|
||||||
|
@ -918,6 +917,13 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
|
||||||
|
|
||||||
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
||||||
{
|
{
|
||||||
|
if (match(TokenType::EscapeSequence)) {
|
||||||
|
// Also part of AtomEscape.
|
||||||
|
auto token = consume();
|
||||||
|
match_length_minimum += 1;
|
||||||
|
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token.value()[0] } });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (try_skip("\\")) {
|
if (try_skip("\\")) {
|
||||||
// AtomEscape.
|
// AtomEscape.
|
||||||
return parse_atom_escape(stack, match_length_minimum, unicode, named);
|
return parse_atom_escape(stack, match_length_minimum, unicode, named);
|
||||||
|
@ -1035,11 +1041,20 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
||||||
}
|
}
|
||||||
|
|
||||||
// IdentityEscape
|
// IdentityEscape
|
||||||
if (match(TokenType::EscapeSequence)) {
|
for (auto ch : StringView { "^$\\.*+?()[]{}|" }) {
|
||||||
match_length_minimum += 1;
|
if (try_skip({ &ch, 1 })) {
|
||||||
auto token = consume().value();
|
match_length_minimum += 1;
|
||||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[token.length() - 1] } });
|
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } });
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unicode) {
|
||||||
|
if (try_skip("/")) {
|
||||||
|
match_length_minimum += 1;
|
||||||
|
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'/' } });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (named && try_skip("k")) {
|
if (named && try_skip("k")) {
|
||||||
|
@ -1171,6 +1186,8 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||||
return { { .code_point = '\v', .is_character_class = false } };
|
return { { .code_point = '\v', .is_character_class = false } };
|
||||||
if (try_skip("b"))
|
if (try_skip("b"))
|
||||||
return { { .code_point = '\b', .is_character_class = false } };
|
return { { .code_point = '\b', .is_character_class = false } };
|
||||||
|
if (try_skip("/"))
|
||||||
|
return { { .code_point = '/', .is_character_class = false } };
|
||||||
|
|
||||||
// CharacterEscape > ControlLetter
|
// CharacterEscape > ControlLetter
|
||||||
if (try_skip("c")) {
|
if (try_skip("c")) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue