1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 18:17:44 +00:00

LibRegex: Generate a search tree when patterns would benefit from it

This takes the previous alternation optimisation and applies it to all
the alternation blocks instead of just the few instructions at the
start.
By generating a trie of instructions, all logically equivalent
instructions will be consolidated into a single node, allowing the
engine to avoid checking the same thing multiple times.
For instance, given the pattern /abc|ac|ab/, this optimisation would
generate the following tree:
    - a
    | - b
    | | - c
    | | | - <accept>
    | | - <accept>
    | - c
    | | - <accept>
which will attempt to match 'a' or 'b' only once, and would also limit
the number of backtrackings performed in case alternatives fails to
match.

This optimisation is currently gated behind a simple cost model that
estimates the number of instructions generated, which is pessimistic for
small patterns, though the change in performance in such patterns is not
particularly large.
This commit is contained in:
Ali Mohammad Pur 2023-07-28 21:02:34 +03:30 committed by Andreas Kling
parent 18f4b6c670
commit 4e69eb89e8
4 changed files with 347 additions and 152 deletions

View file

@ -185,6 +185,23 @@ public:
Base::first_chunk().prepend(forward<T>(value));
}
void append(Span<ByteCodeValueType const> value)
{
if (is_empty())
Base::append({});
auto& last = Base::last_chunk();
last.ensure_capacity(value.size());
for (auto v : value)
last.unchecked_append(v);
}
void ensure_capacity(size_t capacity)
{
if (is_empty())
Base::append({});
Base::last_chunk().ensure_capacity(capacity);
}
void last_chunk() const = delete;
void first_chunk() const = delete;
@ -210,20 +227,11 @@ public:
void insert_bytecode_compare_string(StringView view)
{
ByteCode bytecode;
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
bytecode.empend(static_cast<u64>(1)); // number of arguments
ByteCode arguments;
arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
arguments.insert_string(view);
bytecode.empend(arguments.size()); // size of arguments
bytecode.extend(move(arguments));
extend(move(bytecode));
empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
empend(static_cast<u64>(1)); // number of arguments
empend(2 + view.length()); // size of arguments
empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
insert_string(view);
}
void insert_bytecode_group_capture_left(size_t capture_groups_count)