mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 04:27:44 +00:00
LibRegex: Make append_alternation() significantly faster
...by flattening the underlying bytecode chunks first. Also avoid calling DisjointChunks::size() inside a loop. This is a very significant improvement in performance, making the compilation of a large regex with lots of alternatives take only ~100ms instead of many minutes (I ran out of patience waiting for it) :^)
This commit is contained in:
parent
66249612d6
commit
b8f03bb072
1 changed files with 10 additions and 4 deletions
|
@ -32,6 +32,8 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
|
||||||
BasicBlockList block_boundaries;
|
BasicBlockList block_boundaries;
|
||||||
size_t end_of_last_block = 0;
|
size_t end_of_last_block = 0;
|
||||||
|
|
||||||
|
auto bytecode_size = bytecode.size();
|
||||||
|
|
||||||
MatchState state;
|
MatchState state;
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
auto check_jump = [&]<typename T>(OpCode const& opcode) {
|
auto check_jump = [&]<typename T>(OpCode const& opcode) {
|
||||||
|
@ -88,14 +90,14 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
|
||||||
}
|
}
|
||||||
|
|
||||||
auto next_ip = state.instruction_position + opcode.size();
|
auto next_ip = state.instruction_position + opcode.size();
|
||||||
if (next_ip < bytecode.size())
|
if (next_ip < bytecode_size)
|
||||||
state.instruction_position = next_ip;
|
state.instruction_position = next_ip;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end_of_last_block < bytecode.size())
|
if (end_of_last_block < bytecode_size)
|
||||||
block_boundaries.append({ end_of_last_block, bytecode.size() });
|
block_boundaries.append({ end_of_last_block, bytecode_size });
|
||||||
|
|
||||||
quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; });
|
quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; });
|
||||||
|
|
||||||
|
@ -382,6 +384,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
||||||
|
|
||||||
if (!needed_patches.is_empty()) {
|
if (!needed_patches.is_empty()) {
|
||||||
MatchState state;
|
MatchState state;
|
||||||
|
auto bytecode_size = bytecode.size();
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
struct Patch {
|
struct Patch {
|
||||||
ssize_t value;
|
ssize_t value;
|
||||||
|
@ -389,7 +392,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
||||||
bool should_negate { false };
|
bool should_negate { false };
|
||||||
};
|
};
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (state.instruction_position >= bytecode.size())
|
if (state.instruction_position >= bytecode_size)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
|
@ -471,6 +474,9 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&&
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
left.flatten();
|
||||||
|
right.flatten();
|
||||||
|
|
||||||
auto left_blocks = Regex<PosixBasicParser>::split_basic_blocks(left);
|
auto left_blocks = Regex<PosixBasicParser>::split_basic_blocks(left);
|
||||||
auto right_blocks = Regex<PosixBasicParser>::split_basic_blocks(right);
|
auto right_blocks = Regex<PosixBasicParser>::split_basic_blocks(right);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue