1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 19:37:35 +00:00

LibRegex: Make FailForks fail all forks up to the last save point

This makes negative lookarounds with more than one fork behave
correctly.
Fixes #11350.
This commit is contained in:
Ali Mohammad Pur 2021-12-25 05:35:09 +03:30 committed by Andreas Kling
parent 105d558922
commit 1a35e27490
4 changed files with 30 additions and 15 deletions

View file

@ -973,3 +973,14 @@ TEST_CASE(posix_basic_dollar_is_literal)
EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true); EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true);
} }
} }
TEST_CASE(negative_lookahead)
{
{
// Negative lookahead with more than 2 forks difference between lookahead init and finish.
Regex<ECMA262> re(":(?!\\^\\)|1)", ECMAScriptFlags::Global);
EXPECT_EQ(re.match(":^)").success, false);
EXPECT_EQ(re.match(":1").success, false);
EXPECT_EQ(re.match(":foobar").success, true);
}
}

View file

@ -139,6 +139,7 @@ static void reverse_string_position(MatchState& state, RegexStringView view, siz
static void save_string_position(MatchInput const& input, MatchState const& state) static void save_string_position(MatchInput const& input, MatchState const& state)
{ {
input.saved_positions.append(state.string_position); input.saved_positions.append(state.string_position);
input.saved_forks_since_last_save.append(state.forks_since_last_save);
input.saved_code_unit_positions.append(state.string_position_in_code_units); input.saved_code_unit_positions.append(state.string_position_in_code_units);
} }
@ -149,6 +150,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state)
state.string_position = input.saved_positions.take_last(); state.string_position = input.saved_positions.take_last();
state.string_position_in_code_units = input.saved_code_unit_positions.take_last(); state.string_position_in_code_units = input.saved_code_unit_positions.take_last();
state.forks_since_last_save = input.saved_forks_since_last_save.take_last();
return true; return true;
} }
@ -207,6 +209,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc
ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const
{ {
save_string_position(input, state); save_string_position(input, state);
state.forks_since_last_save = 0;
return ExecutionResult::Continue; return ExecutionResult::Continue;
} }
@ -226,11 +229,9 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, Ma
return ExecutionResult::Continue; return ExecutionResult::Continue;
} }
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const
{ {
VERIFY(count() > 0); input.fail_counter += state.forks_since_last_save;
input.fail_counter += count() - 1;
return ExecutionResult::Failed_ExecuteLowPrioForks; return ExecutionResult::Failed_ExecuteLowPrioForks;
} }
@ -243,6 +244,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState
ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const
{ {
state.fork_at_position = state.instruction_position + size() + offset(); state.fork_at_position = state.instruction_position + size() + offset();
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioHigh; return ExecutionResult::Fork_PrioHigh;
} }
@ -250,12 +252,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const&
{ {
state.fork_at_position = state.instruction_position + size() + offset(); state.fork_at_position = state.instruction_position + size() + offset();
input.fork_to_replace = state.instruction_position; input.fork_to_replace = state.instruction_position;
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioHigh; return ExecutionResult::Fork_PrioHigh;
} }
ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const
{ {
state.fork_at_position = state.instruction_position + size() + offset(); state.fork_at_position = state.instruction_position + size() + offset();
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioLow; return ExecutionResult::Fork_PrioLow;
} }
@ -263,6 +267,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const&
{ {
state.fork_at_position = state.instruction_position + size() + offset(); state.fork_at_position = state.instruction_position + size() + offset();
input.fork_to_replace = state.instruction_position; input.fork_to_replace = state.instruction_position;
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioLow; return ExecutionResult::Fork_PrioLow;
} }

View file

@ -267,20 +267,19 @@ public:
// JUMP _A // JUMP _A
// LABEL _L // LABEL _L
// REGEXP BODY // REGEXP BODY
// FAIL 2 // FAIL
// LABEL _A // LABEL _A
// SAVE // SAVE
// FORKJUMP _L // FORKJUMP _L
// RESTORE // RESTORE
auto body_length = lookaround_body.size(); auto body_length = lookaround_body.size();
empend((ByteCodeValueType)OpCodeId::Jump); empend((ByteCodeValueType)OpCodeId::Jump);
empend((ByteCodeValueType)body_length + 2); // JUMP to label _A empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
extend(move(lookaround_body)); extend(move(lookaround_body));
empend((ByteCodeValueType)OpCodeId::FailForks); empend((ByteCodeValueType)OpCodeId::FailForks);
empend((ByteCodeValueType)2); // Fail two forks
empend((ByteCodeValueType)OpCodeId::Save); empend((ByteCodeValueType)OpCodeId::Save);
empend((ByteCodeValueType)OpCodeId::ForkJump); empend((ByteCodeValueType)OpCodeId::ForkJump);
empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
empend((ByteCodeValueType)OpCodeId::Restore); empend((ByteCodeValueType)OpCodeId::Restore);
return; return;
} }
@ -300,22 +299,21 @@ public:
// LABEL _L // LABEL _L
// GOBACK match_length(BODY) // GOBACK match_length(BODY)
// REGEXP BODY // REGEXP BODY
// FAIL 2 // FAIL
// LABEL _A // LABEL _A
// SAVE // SAVE
// FORKJUMP _L // FORKJUMP _L
// RESTORE // RESTORE
auto body_length = lookaround_body.size(); auto body_length = lookaround_body.size();
empend((ByteCodeValueType)OpCodeId::Jump); empend((ByteCodeValueType)OpCodeId::Jump);
empend((ByteCodeValueType)body_length + 4); // JUMP to label _A empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
empend((ByteCodeValueType)OpCodeId::GoBack); empend((ByteCodeValueType)OpCodeId::GoBack);
empend((ByteCodeValueType)match_length); empend((ByteCodeValueType)match_length);
extend(move(lookaround_body)); extend(move(lookaround_body));
empend((ByteCodeValueType)OpCodeId::FailForks); empend((ByteCodeValueType)OpCodeId::FailForks);
empend((ByteCodeValueType)2); // Fail two forks
empend((ByteCodeValueType)OpCodeId::Save); empend((ByteCodeValueType)OpCodeId::Save);
empend((ByteCodeValueType)OpCodeId::ForkJump); empend((ByteCodeValueType)OpCodeId::ForkJump);
empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
empend((ByteCodeValueType)OpCodeId::Restore); empend((ByteCodeValueType)OpCodeId::Restore);
return; return;
} }
@ -593,9 +591,8 @@ class OpCode_FailForks final : public OpCode {
public: public:
ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; } ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t size() const override { return 1; }
ALWAYS_INLINE size_t count() const { return argument(0); } String arguments_string() const override { return String::empty(); }
String arguments_string() const override { return String::formatted("count={}", count()); }
}; };
class OpCode_Save final : public OpCode { class OpCode_Save final : public OpCode {

View file

@ -514,6 +514,7 @@ struct MatchInput {
mutable size_t fail_counter { 0 }; mutable size_t fail_counter { 0 };
mutable Vector<size_t> saved_positions; mutable Vector<size_t> saved_positions;
mutable Vector<size_t> saved_code_unit_positions; mutable Vector<size_t> saved_code_unit_positions;
mutable Vector<size_t> saved_forks_since_last_save;
mutable HashMap<u64, u64> checkpoints; mutable HashMap<u64, u64> checkpoints;
mutable Optional<size_t> fork_to_replace; mutable Optional<size_t> fork_to_replace;
}; };
@ -524,6 +525,7 @@ struct MatchState {
size_t string_position_in_code_units { 0 }; size_t string_position_in_code_units { 0 };
size_t instruction_position { 0 }; size_t instruction_position { 0 };
size_t fork_at_position { 0 }; size_t fork_at_position { 0 };
size_t forks_since_last_save { 0 };
Optional<size_t> initiating_fork; Optional<size_t> initiating_fork;
Vector<Match> matches; Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches; Vector<Vector<Match>> capture_group_matches;