mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 18:17:44 +00:00
LibRegex: Generate a search tree when patterns would benefit from it
This takes the previous alternation optimisation and applies it to all the alternation blocks instead of just the few instructions at the start. By generating a trie of instructions, all logically equivalent instructions will be consolidated into a single node, allowing the engine to avoid checking the same thing multiple times. For instance, given the pattern /abc|ac|ab/, this optimisation would generate the following tree: - a | - b | | - c | | | - <accept> | | - <accept> | - c | | - <accept> which will attempt to match 'a' or 'b' only once, and would also limit the number of backtrackings performed in case alternatives fails to match. This optimisation is currently gated behind a simple cost model that estimates the number of instructions generated, which is pessimistic for small patterns, though the change in performance in such patterns is not particularly large.
This commit is contained in:
parent
18f4b6c670
commit
4e69eb89e8
4 changed files with 347 additions and 152 deletions
|
@ -185,6 +185,23 @@ public:
|
|||
Base::first_chunk().prepend(forward<T>(value));
|
||||
}
|
||||
|
||||
void append(Span<ByteCodeValueType const> value)
|
||||
{
|
||||
if (is_empty())
|
||||
Base::append({});
|
||||
auto& last = Base::last_chunk();
|
||||
last.ensure_capacity(value.size());
|
||||
for (auto v : value)
|
||||
last.unchecked_append(v);
|
||||
}
|
||||
|
||||
void ensure_capacity(size_t capacity)
|
||||
{
|
||||
if (is_empty())
|
||||
Base::append({});
|
||||
Base::last_chunk().ensure_capacity(capacity);
|
||||
}
|
||||
|
||||
void last_chunk() const = delete;
|
||||
void first_chunk() const = delete;
|
||||
|
||||
|
@ -210,20 +227,11 @@ public:
|
|||
|
||||
void insert_bytecode_compare_string(StringView view)
|
||||
{
|
||||
ByteCode bytecode;
|
||||
|
||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
|
||||
bytecode.empend(static_cast<u64>(1)); // number of arguments
|
||||
|
||||
ByteCode arguments;
|
||||
|
||||
arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
|
||||
arguments.insert_string(view);
|
||||
|
||||
bytecode.empend(arguments.size()); // size of arguments
|
||||
bytecode.extend(move(arguments));
|
||||
|
||||
extend(move(bytecode));
|
||||
empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
|
||||
empend(static_cast<u64>(1)); // number of arguments
|
||||
empend(2 + view.length()); // size of arguments
|
||||
empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
|
||||
insert_string(view);
|
||||
}
|
||||
|
||||
void insert_bytecode_group_capture_left(size_t capture_groups_count)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue