diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index cb4daff96e..fe4611f6f9 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -160,6 +160,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state) OwnPtr ByteCode::s_opcodes[(size_t)OpCodeId::Last + 1]; bool ByteCode::s_opcodes_initialized { false }; +size_t ByteCode::s_next_checkpoint_serial_id { 0 }; void ByteCode::ensure_opcodes_initialized() { @@ -1063,15 +1064,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ResetRepeat::execute(MatchInput const&, Mat ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const& input, MatchState& state) const { - input.checkpoints.set(state.instruction_position, state.string_position); + input.checkpoints.set(id(), state.string_position); return ExecutionResult::Continue; } ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const { u64 current_position = state.string_position; - auto checkpoint_ip = state.instruction_position + size() + checkpoint(); - auto checkpoint_position = input.checkpoints.find(checkpoint_ip); + auto checkpoint_position = input.checkpoints.find(checkpoint()); if (checkpoint_position != input.checkpoints.end() && checkpoint_position->value != current_position) { auto form = this->form(); diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index 78a8b6cf6e..3ad660287e 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -397,12 +397,14 @@ public: // JUMP_NONEMPTY _C _START FORK // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n) + auto checkpoint = s_next_checkpoint_serial_id++; new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint); + new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint); auto jump_kind = static_cast(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay); new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty); - new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // Jump to the last iteration - new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // if _C is not empty. + new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration + new_bytecode.empend(checkpoint); // if _C is not empty. new_bytecode.empend(jump_kind); } @@ -440,11 +442,13 @@ public: // REGEXP // JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy) + auto checkpoint = s_next_checkpoint_serial_id++; + bytecode_to_repeat.prepend((ByteCodeValueType)checkpoint); bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint); bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty); bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label... - bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 2); // ...if _C is not empty + bytecode_to_repeat.empend(checkpoint); // ...if _C is not empty if (greedy) bytecode_to_repeat.empend(static_cast(OpCodeId::ForkJump)); @@ -469,16 +473,17 @@ public: else bytecode.empend(static_cast(OpCodeId::ForkJump)); - bytecode.empend(bytecode_to_repeat.size() + 1 + 4); // Jump to the _END label + bytecode.empend(bytecode_to_repeat.size() + 2 + 4); // Jump to the _END label - auto c_label = bytecode.size(); + auto checkpoint = s_next_checkpoint_serial_id++; bytecode.empend(static_cast(OpCodeId::Checkpoint)); + bytecode.empend(static_cast(checkpoint)); bytecode.extend(bytecode_to_repeat); bytecode.empend(static_cast(OpCodeId::JumpNonEmpty)); - bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label... - bytecode.empend(c_label - bytecode.size() - 2); // ...only if _C passes. + bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label... + bytecode.empend(checkpoint); // ...only if _C passes. bytecode.empend((ByteCodeValueType)OpCodeId::Jump); // LABEL _END = bytecode.size() @@ -507,6 +512,8 @@ public: OpCode& get_opcode(MatchState& state) const; + static void reset_checkpoint_serial_id() { s_next_checkpoint_serial_id = 0; } + private: void insert_string(StringView view) { @@ -519,6 +526,7 @@ private: ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const; static OwnPtr s_opcodes[(size_t)OpCodeId::Last + 1]; static bool s_opcodes_initialized; + static size_t s_next_checkpoint_serial_id; }; #define ENUMERATE_EXECUTION_RESULTS \ @@ -793,8 +801,9 @@ class OpCode_Checkpoint final : public OpCode { public: ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; } - ALWAYS_INLINE size_t size() const override { return 1; } - DeprecatedString arguments_string() const override { return DeprecatedString::empty(); } + ALWAYS_INLINE size_t size() const override { return 2; } + ALWAYS_INLINE size_t id() const { return argument(0); } + DeprecatedString arguments_string() const override { return DeprecatedString::formatted("id={}", id()); } }; class OpCode_JumpNonEmpty final : public OpCode { @@ -807,10 +816,10 @@ public: ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); } DeprecatedString arguments_string() const override { - return DeprecatedString::formatted("{} offset={} [&{}], cp={} [&{}]", + return DeprecatedString::formatted("{} offset={} [&{}], cp={}", opcode_id_name(form()), offset(), state().instruction_position + size() + offset(), - checkpoint(), state().instruction_position + size() + checkpoint()); + checkpoint()); } }; diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 475e542e5f..38632fdfc4 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -179,6 +179,8 @@ ALWAYS_INLINE void Parser::reset() Parser::Result Parser::parse(Optional regex_options) { + ByteCode::reset_checkpoint_serial_id(); + reset(); if (regex_options.has_value()) m_parser_state.regex_options = regex_options.value();