1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 06:27:45 +00:00

LibRegex: Make append_alternation() significantly faster

...by flattening the underlying bytecode chunks first.
Also avoid calling DisjointChunks::size() inside a loop.
This is a very significant improvement in performance, making the
compilation of a large regex with lots of alternatives take only ~100ms
instead of many minutes (I ran out of patience waiting for it) :^)
This commit is contained in:
Ali Mohammad Pur 2021-12-21 18:08:15 +03:30 committed by Andreas Kling
parent 66249612d6
commit b8f03bb072

View file

@ -32,6 +32,8 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
BasicBlockList block_boundaries;
size_t end_of_last_block = 0;
auto bytecode_size = bytecode.size();
MatchState state;
state.instruction_position = 0;
auto check_jump = [&]<typename T>(OpCode const& opcode) {
@ -88,14 +90,14 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
}
auto next_ip = state.instruction_position + opcode.size();
if (next_ip < bytecode.size())
if (next_ip < bytecode_size)
state.instruction_position = next_ip;
else
break;
}
if (end_of_last_block < bytecode.size())
block_boundaries.append({ end_of_last_block, bytecode.size() });
if (end_of_last_block < bytecode_size)
block_boundaries.append({ end_of_last_block, bytecode_size });
quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; });
@ -382,6 +384,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
if (!needed_patches.is_empty()) {
MatchState state;
auto bytecode_size = bytecode.size();
state.instruction_position = 0;
struct Patch {
ssize_t value;
@ -389,7 +392,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
bool should_negate { false };
};
for (;;) {
if (state.instruction_position >= bytecode.size())
if (state.instruction_position >= bytecode_size)
break;
auto& opcode = bytecode.get_opcode(state);
@ -471,6 +474,9 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&&
return;
}
left.flatten();
right.flatten();
auto left_blocks = Regex<PosixBasicParser>::split_basic_blocks(left);
auto right_blocks = Regex<PosixBasicParser>::split_basic_blocks(right);