LibRegex: Make append_alternation() significantly faster

...by flattening the underlying bytecode chunks first. Also avoid calling DisjointChunks::size() inside a loop. This is a very significant improvement in performance, making the compilation of a large regex with lots of alternatives take only ~100ms instead of many minutes (I ran out of patience waiting for it) :^)
2025-09-14 09:08:01 +00:00 · 2021-12-21 18:08:15 +03:30 · 2021-12-21 18:08:15 +03:30 · b8f03bb072
commit b8f03bb072
parent 66249612d6
1 changed files with 10 additions and 4 deletions
--- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp
+++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp
@ -32,6 +32,8 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
    BasicBlockList block_boundaries;
    size_t end_of_last_block = 0;
    auto bytecode_size = bytecode.size();
    MatchState state;
    state.instruction_position = 0;
    auto check_jump = [&]<typename T>(OpCode const& opcode) {
@ -88,14 +90,14 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
        }
        auto next_ip = state.instruction_position + opcode.size();
-        if (next_ip < bytecode.size())
+        if (next_ip < bytecode_size)
            state.instruction_position = next_ip;
        else
            break;
    }
-    if (end_of_last_block < bytecode.size())
+    if (end_of_last_block < bytecode_size)
-        block_boundaries.append({ end_of_last_block, bytecode.size() });
+        block_boundaries.append({ end_of_last_block, bytecode_size });
    quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; });
@ -382,6 +384,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
    if (!needed_patches.is_empty()) {
        MatchState state;
        auto bytecode_size = bytecode.size();
        state.instruction_position = 0;
        struct Patch {
            ssize_t value;
@ -389,7 +392,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
            bool should_negate { false };
        };
        for (;;) {
-            if (state.instruction_position >= bytecode.size())
+            if (state.instruction_position >= bytecode_size)
                break;
            auto& opcode = bytecode.get_opcode(state);
@ -471,6 +474,9 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&&
        return;
    }
    left.flatten();
    right.flatten();
    auto left_blocks = Regex<PosixBasicParser>::split_basic_blocks(left);
    auto right_blocks = Regex<PosixBasicParser>::split_basic_blocks(right);