diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index c0e1c97ee5..cea60b9fcf 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -10,6 +10,7 @@ #include "RegexMatch.h" #include "RegexOptions.h" +#include #include #include #include @@ -139,7 +140,9 @@ struct CompareTypeAndValuePair { class OpCode; -class ByteCode : public Vector { +class ByteCode : public DisjointChunks { + using Base = DisjointChunks; + public: ByteCode() { @@ -150,6 +153,36 @@ public: virtual ~ByteCode() = default; ByteCode& operator=(ByteCode&&) = default; + ByteCode& operator=(Base&& value) + { + static_cast(*this) = move(value); + return *this; + } + + template + void empend(Args&&... args) + { + if (is_empty()) + Base::append({}); + Base::last_chunk().empend(forward(args)...); + } + template + void append(T&& value) + { + if (is_empty()) + Base::append({}); + Base::last_chunk().append(forward(value)); + } + template + void prepend(T&& value) + { + if (is_empty()) + return append(forward(value)); + Base::first_chunk().prepend(forward(value)); + } + + void last_chunk() const = delete; + void first_chunk() const = delete; void insert_bytecode_compare_values(Vector&& pairs) { @@ -309,7 +342,7 @@ public: VERIFY_NOT_REACHED(); } - void insert_bytecode_alternation(ByteCode left, ByteCode right) + void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right) { // FORKJUMP _ALT @@ -320,7 +353,7 @@ public: // LABEL _END // Optimisation: Eliminate extra work by unifying common pre-and-postfix exprs. - Optimizer::append_alternation(*this, left, right); + Optimizer::append_alternation(*this, move(left), move(right)); } template @@ -476,8 +509,7 @@ public: bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label - for (auto& op : bytecode_to_repeat) - bytecode.append(move(op)); + bytecode.extend(move(bytecode_to_repeat)); // LABEL _END = bytecode.size() bytecode_to_repeat = move(bytecode); diff --git a/Userland/Libraries/LibRegex/RegexBytecodeStreamOptimizer.h b/Userland/Libraries/LibRegex/RegexBytecodeStreamOptimizer.h index bc1c703402..7fff2809f5 100644 --- a/Userland/Libraries/LibRegex/RegexBytecodeStreamOptimizer.h +++ b/Userland/Libraries/LibRegex/RegexBytecodeStreamOptimizer.h @@ -12,7 +12,7 @@ namespace regex { class Optimizer { public: - static void append_alternation(ByteCode& target, ByteCode& left, ByteCode& right); + static void append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right); }; } diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp index 5c63243ee5..d19c2178b0 100644 --- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp @@ -20,6 +20,8 @@ void Regex::run_optimization_passes() // Rewrite fork loops as atomic groups // e.g. a*b -> (ATOMIC a*)b attempt_rewrite_loops_as_atomic_groups(split_basic_blocks()); + + parser_result.bytecode.flatten(); } template @@ -413,7 +415,7 @@ void Regex::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const& } } -void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& right) +void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right) { if (left.is_empty()) { target.extend(right); @@ -433,7 +435,7 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& r if (left_size != right_size) break; - if (left.span().slice(state.instruction_position, left_size) == right.span().slice(state.instruction_position, right_size)) + if (left.spans().slice(state.instruction_position, left_size) == right.spans().slice(state.instruction_position, right_size)) left_skip = state.instruction_position + left_size; else break; @@ -441,34 +443,30 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& r state.instruction_position += left_size; } - // FIXME: Implement postfix unification too. - size_t right_skip = 0; + dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, 0, left.size(), right.size()); - if (left_skip) - target.append(left.data(), left_skip); + if (left_skip) { + target.extend(left.release_slice(0, left_skip)); + right = right.release_slice(left_skip); + } - dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, right_skip, left.size(), right.size()); - - auto left_slice = left.span().slice(left_skip, left.size() - left_skip - right_skip); - auto right_slice = right.span().slice(left_skip, right.size() - left_skip - right_skip); + auto left_size = left.size(); target.empend(static_cast(OpCodeId::ForkJump)); - target.empend(right_slice.size() + 2); // Jump to the _ALT label + target.empend(right.size() + (left_size ? 2 : 0)); // Jump to the _ALT label - target.append(right_slice.data(), right_slice.size()); + target.extend(move(right)); - if (!left_slice.is_empty()) { + if (left_size != 0) { target.empend(static_cast(OpCodeId::Jump)); - target.empend(left_slice.size()); // Jump to the _END label + target.empend(left.size()); // Jump to the _END label } // LABEL _ALT = bytecode.size() + 2 - target.append(left_slice.data(), left_slice.size()); + target.extend(move(left)); // LABEL _END = alterantive_bytecode.size - if (right_skip) - target.append(left.span().slice_from_end(right_skip).data(), right_skip); } template void Regex::run_optimization_passes();