From b1ca2e5e39a9efa616e08d29a8af450c06d9b75e Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Wed, 14 Jun 2023 01:02:40 +0330 Subject: [PATCH] LibRegex: Do not treat repeats followed by fallthroughs as atomic --- Tests/LibRegex/Regex.cpp | 3 +++ Userland/Libraries/LibRegex/RegexOptimizer.cpp | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 44792ad4cc..f40c48ab55 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -995,6 +995,9 @@ TEST_CASE(optimizer_atomic_groups) Tuple { "[^x]+y"sv, "ay"sv, true }, // .+ should not be rewritten here, as it's followed by something that would be matched by `.`. Tuple { ".+(a|b|c)"sv, "xxa"sv, true }, + // (b+)(b+) produces an intermediate block with no matching ops, the optimiser should ignore that block when looking for following matches and correctly detect the overlap between (b+) and (b+). + // note that the second loop may be rewritten to a ForkReplace, but the first loop should not be rewritten. + Tuple { "(b+)(b+)"sv, "bbb"sv, true }, }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp index 337e3d3529..63cfcb453d 100644 --- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp @@ -351,7 +351,9 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi bool following_block_has_at_least_one_compare = false; // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'. + auto final_instruction = following_block.start; for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) { + final_instruction = state.instruction_position; auto& opcode = bytecode.get_opcode(state); switch (opcode.opcode_id()) { // Note: These have to exist since we're effectively repeating the following block as well @@ -399,6 +401,18 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi state.instruction_position += opcode.size(); } + // If the following block falls through, we can't rewrite it. + state.instruction_position = final_instruction; + switch (bytecode.get_opcode(state).opcode_id()) { + case OpCodeId::Jump: + case OpCodeId::JumpNonEmpty: + case OpCodeId::ForkJump: + case OpCodeId::ForkReplaceJump: + break; + default: + return AtomicRewritePreconditionResult::NotSatisfied; + } + if (following_block_has_at_least_one_compare) return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; return AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader; @@ -717,7 +731,7 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives size_t i = 0; for (auto& entry : alternatives) { - auto& blocks = basic_blocks[i]; + auto& blocks = basic_blocks[i++]; auto& block = blocks[block_index]; auto end = block_index + 1 == blocks.size() ? block.end : blocks[block_index + 1].start; state.instruction_position = block.start;