From 6a4c8a66aeba4b6ed0a91f2f0e9b7da42fe2b06f Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Wed, 9 Feb 2022 23:43:09 +0330 Subject: [PATCH] LibRegex: Only skip full instructions when optimizing alternations It makes no sense to skip half of an instruction, so make sure to skip only full instructions! --- Tests/LibRegex/Regex.cpp | 2 ++ Userland/Libraries/LibRegex/RegexOptimizer.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 7c4dfe80fe..7dbf8c6d3d 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -923,6 +923,8 @@ TEST_CASE(optimizer_atomic_groups) Tuple { "(a|)"sv, ""sv, true }, // Ensure that empty alternatives are not outright removed Tuple { "a{2,3}|a{5,8}"sv, "abc"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247. Tuple { "^(a{2,3}|a{5,8})$"sv, "aaaa"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247. + // Optimizer should not chop off *half* of an instruction when fusing instructions. + Tuple { "cubic-bezier\\(\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*\\)"sv, "cubic-bezier(.05, 0, 0, 1)"sv, true }, // ForkReplace shouldn't be applied where it would change the semantics Tuple { "(1+)\\1"sv, "11"sv, true }, Tuple { "(1+)1"sv, "11"sv, true }, diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp index 8dd7335463..969f37143c 100644 --- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp @@ -484,7 +484,12 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& if (left.spans().slice(left_block.start, left_end - left_block.start) != right.spans().slice(right_block.start, right_end - right_block.start)) break; - left_skip = left_end; + state.instruction_position = 0; + while (state.instruction_position < left_end) { + auto& opcode = left.get_opcode(state); + left_skip = state.instruction_position; + state.instruction_position += opcode.size(); + } } dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, 0, left.size(), right.size());