mirror of
https://github.com/RGBCube/serenity
synced 2025-06-01 10:08:10 +00:00
LibRegex: Bail out of atomic rewrite if a block doesn't contain compares
If a block jumps before performing a compare, we'd need to recursively find the first of the jumped-to block. While this is doable, it's not really worth spending the time as most such cases won't actually qualify for atomic loop rewrite anyway. Fixes an invalid rewrite when `.+` is followed by an alternation, e.g. /.+(a|b|c)/.
This commit is contained in:
parent
af441bb939
commit
7f530c0753
2 changed files with 21 additions and 1 deletions
|
@ -986,6 +986,8 @@ TEST_CASE(optimizer_atomic_groups)
|
|||
Tuple { "a+"sv, ""sv, false },
|
||||
// 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here.
|
||||
Tuple { "[^x]+y"sv, "ay"sv, true },
|
||||
// .+ should not be rewritten here, as it's followed by something that would be matched by `.`.
|
||||
Tuple { ".+(a|b|c)"sv, "xxa"sv, true },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
|
|
|
@ -303,10 +303,12 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
|
|||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||
HashTable<size_t> active_capture_groups;
|
||||
MatchState state;
|
||||
auto has_seen_actionable_opcode = false;
|
||||
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
||||
auto& opcode = bytecode.get_opcode(state);
|
||||
switch (opcode.opcode_id()) {
|
||||
case OpCodeId::Compare: {
|
||||
has_seen_actionable_opcode = true;
|
||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
|
@ -315,6 +317,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
|
|||
}
|
||||
case OpCodeId::CheckBegin:
|
||||
case OpCodeId::CheckEnd:
|
||||
has_seen_actionable_opcode = true;
|
||||
if (repeated_values.is_empty())
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||
break;
|
||||
|
@ -330,6 +333,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
|
|||
case OpCodeId::SaveLeftCaptureGroup:
|
||||
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
||||
break;
|
||||
case OpCodeId::ForkJump:
|
||||
case OpCodeId::ForkReplaceJump:
|
||||
case OpCodeId::JumpNonEmpty:
|
||||
// We could attempt to recursively resolve the follow set, but pretending that this just goes nowhere is faster.
|
||||
if (!has_seen_actionable_opcode)
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -375,6 +385,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
|
|||
case OpCodeId::CheckBoundary:
|
||||
// FIXME: What should we do with these? For now, consider them a failure.
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
case OpCodeId::ForkJump:
|
||||
case OpCodeId::ForkReplaceJump:
|
||||
case OpCodeId::JumpNonEmpty:
|
||||
// See note in the previous switch, same cases.
|
||||
if (!following_block_has_at_least_one_compare)
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -484,7 +501,8 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithoutHeader)) {
|
||||
// We've found RE0 (and RE1 is just the following block, if any), let's see if the precondition applies.
|
||||
// if RE1 is empty, there's no first(RE1), so this is an automatic pass.
|
||||
if (!fork_fallback_block.has_value() || fork_fallback_block->end == fork_fallback_block->start) {
|
||||
if (!fork_fallback_block.has_value()
|
||||
|| (fork_fallback_block->end == fork_fallback_block->start && block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block) != AtomicRewritePreconditionResult::NotSatisfied)) {
|
||||
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue