1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 11:18:11 +00:00

LibRegex: Implement an ECMA262 Regex quirk with negative lookarounds

This implements the quirk defined by "Note 3" in section "Canonicalize"
(https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch).

Crosses off another quirk from #6042.
This commit is contained in:
Ali Mohammad Pur 2022-01-21 14:30:47 +03:30 committed by Ali Mohammad Pur
parent bfe8f312f3
commit c11be92e23
4 changed files with 54 additions and 16 deletions

View file

@ -10,6 +10,7 @@
#include <AK/AnyOf.h>
#include <AK/CharacterTypes.h>
#include <AK/GenericLexer.h>
#include <AK/ScopeGuard.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/StringUtils.h>
@ -1065,9 +1066,17 @@ bool ECMA262Parser::parse_assertion(ByteCode& stack, [[maybe_unused]] size_t& ma
return true;
}
if (should_parse_forward_assertion && try_skip("!")) {
enter_capture_group_scope();
ScopeGuard quit_scope {
[this] {
exit_capture_group_scope();
}
};
if (!parse_inner_disjunction(assertion_stack, length_dummy, unicode, named))
return false;
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::NegatedLookAhead);
clear_all_capture_groups_in_scope(stack);
return true;
}
if (m_should_use_browser_extended_grammar) {
@ -1086,9 +1095,16 @@ bool ECMA262Parser::parse_assertion(ByteCode& stack, [[maybe_unused]] size_t& ma
return true;
}
if (try_skip("<!")) {
enter_capture_group_scope();
ScopeGuard quit_scope {
[this] {
exit_capture_group_scope();
}
};
if (!parse_inner_disjunction(assertion_stack, length_dummy, unicode, named))
return false;
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::NegatedLookBehind, length_dummy);
clear_all_capture_groups_in_scope(stack);
return true;
}
@ -1124,10 +1140,17 @@ bool ECMA262Parser::parse_quantifiable_assertion(ByteCode& stack, size_t&, bool
return true;
}
if (try_skip("!")) {
enter_capture_group_scope();
ScopeGuard quit_scope {
[this] {
exit_capture_group_scope();
}
};
if (!parse_inner_disjunction(assertion_stack, match_length_minimum, false, named))
return false;
stack.insert_bytecode_lookaround(move(assertion_stack), ByteCode::LookAroundType::NegatedLookAhead);
clear_all_capture_groups_in_scope(stack);
return true;
}
@ -2189,20 +2212,9 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
{
consume(TokenType::LeftParen, Error::InvalidPattern);
auto enter_capture_group_scope = [&] {
m_capture_groups_in_scope.empend();
};
auto exit_capture_group_scope = [&] {
auto last = m_capture_groups_in_scope.take_last();
m_capture_groups_in_scope.last().extend(move(last));
};
auto register_capture_group_in_current_scope = [&](auto identifier) {
m_capture_groups_in_scope.last().empend(identifier);
};
auto clear_all_capture_groups_in_scope = [&] {
for (auto& index : m_capture_groups_in_scope.last())
stack.insert_bytecode_clear_capture_group(index);
};
if (match(TokenType::Questionmark)) {
// Non-capturing group or group with specifier.
@ -2216,7 +2228,7 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
enter_capture_group_scope();
if (!parse_disjunction(noncapture_group_bytecode, length, unicode, named))
return set_error(Error::InvalidPattern);
clear_all_capture_groups_in_scope();
clear_all_capture_groups_in_scope(stack);
exit_capture_group_scope();
consume(TokenType::RightParen, Error::MismatchingParen);
@ -2246,7 +2258,7 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
enter_capture_group_scope();
if (!parse_disjunction(capture_group_bytecode, length, unicode, named))
return set_error(Error::InvalidPattern);
clear_all_capture_groups_in_scope();
clear_all_capture_groups_in_scope(stack);
exit_capture_group_scope();
register_capture_group_in_current_scope(group_index);
@ -2277,7 +2289,7 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
if (!parse_disjunction(capture_group_bytecode, length, unicode, named))
return set_error(Error::InvalidPattern);
clear_all_capture_groups_in_scope();
clear_all_capture_groups_in_scope(stack);
exit_capture_group_scope();
register_capture_group_in_current_scope(group_index);