From 1a35e27490b7551717af943b9a798df848512a23 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Sat, 25 Dec 2021 05:35:09 +0330 Subject: [PATCH] LibRegex: Make FailForks fail all forks up to the last save point This makes negative lookarounds with more than one fork behave correctly. Fixes #11350. --- Tests/LibRegex/Regex.cpp | 11 +++++++++++ Userland/Libraries/LibRegex/RegexByteCode.cpp | 13 +++++++++---- Userland/Libraries/LibRegex/RegexByteCode.h | 19 ++++++++----------- Userland/Libraries/LibRegex/RegexMatch.h | 2 ++ 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index d55e70afa0..b6d44acc5d 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -973,3 +973,14 @@ TEST_CASE(posix_basic_dollar_is_literal) EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true); } } + +TEST_CASE(negative_lookahead) +{ + { + // Negative lookahead with more than 2 forks difference between lookahead init and finish. + Regex re(":(?!\\^\\)|1)", ECMAScriptFlags::Global); + EXPECT_EQ(re.match(":^)").success, false); + EXPECT_EQ(re.match(":1").success, false); + EXPECT_EQ(re.match(":foobar").success, true); + } +} diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 817b5599a2..6177a1f3e2 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -139,6 +139,7 @@ static void reverse_string_position(MatchState& state, RegexStringView view, siz static void save_string_position(MatchInput const& input, MatchState const& state) { input.saved_positions.append(state.string_position); + input.saved_forks_since_last_save.append(state.forks_since_last_save); input.saved_code_unit_positions.append(state.string_position_in_code_units); } @@ -149,6 +150,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state) state.string_position = input.saved_positions.take_last(); state.string_position_in_code_units = input.saved_code_unit_positions.take_last(); + state.forks_since_last_save = input.saved_forks_since_last_save.take_last(); return true; } @@ -207,6 +209,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const { save_string_position(input, state); + state.forks_since_last_save = 0; return ExecutionResult::Continue; } @@ -226,11 +229,9 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, Ma return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const +ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const { - VERIFY(count() > 0); - - input.fail_counter += count() - 1; + input.fail_counter += state.forks_since_last_save; return ExecutionResult::Failed_ExecuteLowPrioForks; } @@ -243,6 +244,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const { state.fork_at_position = state.instruction_position + size() + offset(); + state.forks_since_last_save++; return ExecutionResult::Fork_PrioHigh; } @@ -250,12 +252,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const& { state.fork_at_position = state.instruction_position + size() + offset(); input.fork_to_replace = state.instruction_position; + state.forks_since_last_save++; return ExecutionResult::Fork_PrioHigh; } ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const { state.fork_at_position = state.instruction_position + size() + offset(); + state.forks_since_last_save++; return ExecutionResult::Fork_PrioLow; } @@ -263,6 +267,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const& { state.fork_at_position = state.instruction_position + size() + offset(); input.fork_to_replace = state.instruction_position; + state.forks_since_last_save++; return ExecutionResult::Fork_PrioLow; } diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index f489576513..af4c155ef6 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -267,20 +267,19 @@ public: // JUMP _A // LABEL _L // REGEXP BODY - // FAIL 2 + // FAIL // LABEL _A // SAVE // FORKJUMP _L // RESTORE auto body_length = lookaround_body.size(); empend((ByteCodeValueType)OpCodeId::Jump); - empend((ByteCodeValueType)body_length + 2); // JUMP to label _A + empend((ByteCodeValueType)body_length + 1); // JUMP to label _A extend(move(lookaround_body)); empend((ByteCodeValueType)OpCodeId::FailForks); - empend((ByteCodeValueType)2); // Fail two forks empend((ByteCodeValueType)OpCodeId::Save); empend((ByteCodeValueType)OpCodeId::ForkJump); - empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L + empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L empend((ByteCodeValueType)OpCodeId::Restore); return; } @@ -300,22 +299,21 @@ public: // LABEL _L // GOBACK match_length(BODY) // REGEXP BODY - // FAIL 2 + // FAIL // LABEL _A // SAVE // FORKJUMP _L // RESTORE auto body_length = lookaround_body.size(); empend((ByteCodeValueType)OpCodeId::Jump); - empend((ByteCodeValueType)body_length + 4); // JUMP to label _A + empend((ByteCodeValueType)body_length + 3); // JUMP to label _A empend((ByteCodeValueType)OpCodeId::GoBack); empend((ByteCodeValueType)match_length); extend(move(lookaround_body)); empend((ByteCodeValueType)OpCodeId::FailForks); - empend((ByteCodeValueType)2); // Fail two forks empend((ByteCodeValueType)OpCodeId::Save); empend((ByteCodeValueType)OpCodeId::ForkJump); - empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L + empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L empend((ByteCodeValueType)OpCodeId::Restore); return; } @@ -593,9 +591,8 @@ class OpCode_FailForks final : public OpCode { public: ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; } - ALWAYS_INLINE size_t size() const override { return 2; } - ALWAYS_INLINE size_t count() const { return argument(0); } - String arguments_string() const override { return String::formatted("count={}", count()); } + ALWAYS_INLINE size_t size() const override { return 1; } + String arguments_string() const override { return String::empty(); } }; class OpCode_Save final : public OpCode { diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index a274711bdc..846b24dec7 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -514,6 +514,7 @@ struct MatchInput { mutable size_t fail_counter { 0 }; mutable Vector saved_positions; mutable Vector saved_code_unit_positions; + mutable Vector saved_forks_since_last_save; mutable HashMap checkpoints; mutable Optional fork_to_replace; }; @@ -524,6 +525,7 @@ struct MatchState { size_t string_position_in_code_units { 0 }; size_t instruction_position { 0 }; size_t fork_at_position { 0 }; + size_t forks_since_last_save { 0 }; Optional initiating_fork; Vector matches; Vector> capture_group_matches;