1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 16:27:35 +00:00

LibRegex: Correct And/Or and inversion interplay semantics

This commit also fixes an incorrect test case from very early on, our
behaviour now matches the ECMA262 spec in this case.

Fixes #21786.
This commit is contained in:
Ali Mohammad Pur 2024-01-11 12:51:13 +03:30 committed by Andreas Kling
parent 89315787ae
commit e265d81277
3 changed files with 29 additions and 8 deletions

View file

@ -426,6 +426,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
bool active { false };
bool is_conjunction { false };
bool fail { false };
bool inverse_matched { false };
size_t initial_position;
size_t initial_code_unit_position;
Optional<size_t> last_accepted_position {};
@ -623,8 +624,9 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
case CharacterCompareType::And:
disjunction_states.append({
.active = true,
.is_conjunction = false,
.fail = false,
.is_conjunction = current_inversion_state(),
.fail = current_inversion_state(),
.inverse_matched = current_inversion_state(),
.initial_position = state.string_position,
.initial_code_unit_position = state.string_position_in_code_units,
});
@ -632,8 +634,9 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
case CharacterCompareType::Or:
disjunction_states.append({
.active = true,
.is_conjunction = true,
.fail = true,
.is_conjunction = !current_inversion_state(),
.fail = !current_inversion_state(),
.inverse_matched = !current_inversion_state(),
.initial_position = state.string_position,
.initial_code_unit_position = state.string_position_in_code_units,
});
@ -644,6 +647,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
state.string_position = disjunction_state.last_accepted_position.value_or(disjunction_state.initial_position);
state.string_position_in_code_units = disjunction_state.last_accepted_code_unit_position.value_or(disjunction_state.initial_code_unit_position);
}
inverse_matched = disjunction_state.inverse_matched || disjunction_state.fail;
break;
}
default:
@ -664,6 +668,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (!failed) {
new_disjunction_state.last_accepted_position = state.string_position;
new_disjunction_state.last_accepted_code_unit_position = state.string_position_in_code_units;
new_disjunction_state.inverse_matched |= inverse_matched;
}
if (new_disjunction_state.is_conjunction)
@ -673,6 +678,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
state.string_position = new_disjunction_state.initial_position;
state.string_position_in_code_units = new_disjunction_state.initial_code_unit_position;
inverse_matched = false;
}
}

View file

@ -1777,10 +1777,12 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
Vector<CompareTypeAndValuePair> compares;
auto uses_explicit_or_semantics = false;
if (match(TokenType::Circumflex)) {
// Negated charclass
consume();
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
uses_explicit_or_semantics = true;
}
// ClassContents :: [empty]
@ -1800,6 +1802,11 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
if (flags.unicode_sets && !parse_class_set_expression(compares))
return false;
if (uses_explicit_or_semantics && compares.size() > 2) {
compares.insert(1, CompareTypeAndValuePair { CharacterCompareType::Or, 0 });
compares.empend(CompareTypeAndValuePair { CharacterCompareType::EndAndOr, 0 });
}
match_length_minimum += 1;
stack.insert_bytecode_compare_values(move(compares));
return true;
@ -2466,9 +2473,9 @@ DeprecatedFlyString ECMA262Parser::read_capture_group_specifier(bool take_starti
{
static auto id_start_category = Unicode::property_from_string("ID_Start"sv);
static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv);
static constexpr const u32 REPLACEMENT_CHARACTER = 0xFFFD;
constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
static constexpr u32 const REPLACEMENT_CHARACTER = 0xFFFD;
constexpr u32 const ZERO_WIDTH_NON_JOINER { 0x200C };
constexpr u32 const ZERO_WIDTH_JOINER { 0x200D };
if (take_starting_angle_bracket && !consume("<"))
return {};