mirror of
https://github.com/RGBCube/serenity
synced 2025-07-24 16:37:35 +00:00
LibRegex: Correctly handle failing in the middle of explicit repeats
- Make sure that all the Repeat ops are reset (otherwise the operation would not be correct when going over the Repeat op a second time) - Make sure that all matches that are allowed to fail are backed by a fork, otherwise the last failing fork would not have anywhere to return to. Fixes #9707.
This commit is contained in:
parent
fcdd7aa990
commit
dd82c2e9b4
3 changed files with 59 additions and 24 deletions
|
@ -189,6 +189,9 @@ void ByteCode::ensure_opcodes_initialized()
|
||||||
case OpCodeId::Repeat:
|
case OpCodeId::Repeat:
|
||||||
s_opcodes[i] = make<OpCode_Repeat>();
|
s_opcodes[i] = make<OpCode_Repeat>();
|
||||||
break;
|
break;
|
||||||
|
case OpCodeId::ResetRepeat:
|
||||||
|
s_opcodes[i] = make<OpCode_ResetRepeat>();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s_opcodes_initialized = true;
|
s_opcodes_initialized = true;
|
||||||
|
@ -883,4 +886,13 @@ ALWAYS_INLINE ExecutionResult OpCode_Repeat::execute(MatchInput const&, MatchSta
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE ExecutionResult OpCode_ResetRepeat::execute(MatchInput const&, MatchState& state) const
|
||||||
|
{
|
||||||
|
if (id() >= state.repetition_marks.size())
|
||||||
|
state.repetition_marks.resize(id() + 1);
|
||||||
|
|
||||||
|
state.repetition_marks.at(id()) = 0;
|
||||||
|
return ExecutionResult::Continue;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ using ByteCodeValueType = u64;
|
||||||
__ENUMERATE_OPCODE(GoBack) \
|
__ENUMERATE_OPCODE(GoBack) \
|
||||||
__ENUMERATE_OPCODE(ClearCaptureGroup) \
|
__ENUMERATE_OPCODE(ClearCaptureGroup) \
|
||||||
__ENUMERATE_OPCODE(Repeat) \
|
__ENUMERATE_OPCODE(Repeat) \
|
||||||
|
__ENUMERATE_OPCODE(ResetRepeat) \
|
||||||
__ENUMERATE_OPCODE(Exit)
|
__ENUMERATE_OPCODE(Exit)
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -333,40 +334,46 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, T minimum, Optional<T> maximum, size_t repetition_mark_id, bool greedy = true) requires(IsIntegral<T>)
|
static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, T minimum, Optional<T> maximum, size_t min_repetition_mark_id, size_t max_repetition_mark_id, bool greedy = true) requires(IsIntegral<T>)
|
||||||
{
|
{
|
||||||
ByteCode new_bytecode;
|
ByteCode new_bytecode;
|
||||||
new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, repetition_mark_id);
|
new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
|
||||||
|
|
||||||
if (maximum.has_value()) {
|
if (maximum.has_value()) {
|
||||||
// (REPEAT REGEXP MIN)
|
// (REPEAT REGEXP MIN)
|
||||||
// LABEL _MAX_LOOP |
|
// LABEL _MAX_LOOP |
|
||||||
// FORK END |
|
// FORK END |
|
||||||
// REGEXP |
|
// REGEXP |
|
||||||
// REPEAT _MAX_LOOP MAX-1 | if max > min
|
// REPEAT _MAX_LOOP MAX-MIN | if max > min
|
||||||
// REGEXP |
|
// FORK END |
|
||||||
// FORK END |
|
// REGEXP |
|
||||||
// LABEL END |
|
// LABEL END |
|
||||||
|
// RESET _MAX_LOOP |
|
||||||
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
|
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
|
||||||
if (maximum.value() > minimum) {
|
if (maximum.value() > minimum) {
|
||||||
new_bytecode.empend(jump_kind);
|
new_bytecode.empend(jump_kind);
|
||||||
new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
|
new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
|
||||||
auto pre_loop_fork_jump_index = new_bytecode.size();
|
auto pre_loop_fork_jump_index = new_bytecode.size();
|
||||||
|
new_bytecode.extend(bytecode_to_repeat);
|
||||||
auto repetitions = maximum.value() - minimum;
|
auto repetitions = maximum.value() - minimum;
|
||||||
dbgln("max {}, min {}, reps {}", *maximum, minimum, repetitions);
|
auto fork_jump_address = new_bytecode.size();
|
||||||
if (repetitions > 1) {
|
if (repetitions > 1) {
|
||||||
new_bytecode.extend(bytecode_to_repeat);
|
|
||||||
new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
|
new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
|
||||||
new_bytecode.empend(bytecode_to_repeat.size() + 2);
|
new_bytecode.empend(bytecode_to_repeat.size() + 2);
|
||||||
new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
|
new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
|
||||||
new_bytecode.empend(repetition_mark_id);
|
new_bytecode.empend(max_repetition_mark_id);
|
||||||
|
new_bytecode.empend(jump_kind);
|
||||||
|
new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
|
||||||
|
auto post_loop_fork_jump_index = new_bytecode.size();
|
||||||
|
new_bytecode.extend(bytecode_to_repeat);
|
||||||
|
fork_jump_address = new_bytecode.size();
|
||||||
|
|
||||||
|
new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
|
||||||
|
|
||||||
|
new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
|
||||||
|
new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
|
||||||
}
|
}
|
||||||
new_bytecode.extend(bytecode_to_repeat);
|
new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
|
||||||
new_bytecode.empend(jump_kind);
|
|
||||||
new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
|
|
||||||
auto post_loop_fork_jump_index = new_bytecode.size();
|
|
||||||
new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(new_bytecode.size() - pre_loop_fork_jump_index);
|
|
||||||
new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(new_bytecode.size() - post_loop_fork_jump_index);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// no maximum value set, repeat finding if possible
|
// no maximum value set, repeat finding if possible
|
||||||
|
@ -724,6 +731,19 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class OpCode_ResetRepeat : public OpCode {
|
||||||
|
public:
|
||||||
|
ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
||||||
|
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ResetRepeat; }
|
||||||
|
ALWAYS_INLINE size_t size() const override { return 2; }
|
||||||
|
ALWAYS_INLINE size_t id() const { return argument(0); }
|
||||||
|
String const arguments_string() const override
|
||||||
|
{
|
||||||
|
auto reps = id() < state().repetition_marks.size() ? state().repetition_marks.at(id()) : 0;
|
||||||
|
return String::formatted("id={} rep={}", id(), reps + 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool is(OpCode const&);
|
bool is(OpCode const&);
|
||||||
|
|
||||||
|
|
|
@ -446,8 +446,9 @@ bool PosixBasicParser::parse_simple_re(ByteCode& bytecode, size_t& match_length_
|
||||||
if (min_limit > s_maximum_repetition_count || (max_limit.has_value() && *max_limit > s_maximum_repetition_count))
|
if (min_limit > s_maximum_repetition_count || (max_limit.has_value() && *max_limit > s_maximum_repetition_count))
|
||||||
return set_error(Error::InvalidBraceContent);
|
return set_error(Error::InvalidBraceContent);
|
||||||
|
|
||||||
auto repetition_mark_id = m_parser_state.repetition_mark_count++;
|
auto min_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
ByteCode::transform_bytecode_repetition_min_max(simple_re_bytecode, min_limit, max_limit, repetition_mark_id, true);
|
auto max_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
|
ByteCode::transform_bytecode_repetition_min_max(simple_re_bytecode, min_limit, max_limit, min_repetition_mark_id, max_repetition_mark_id, true);
|
||||||
match_length_minimum += re_match_length_minimum * min_limit;
|
match_length_minimum += re_match_length_minimum * min_limit;
|
||||||
} else {
|
} else {
|
||||||
match_length_minimum += re_match_length_minimum;
|
match_length_minimum += re_match_length_minimum;
|
||||||
|
@ -620,8 +621,9 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
|
||||||
maybe_maximum = value.value();
|
maybe_maximum = value.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto repetition_mark_id = m_parser_state.repetition_mark_count++;
|
auto min_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
ByteCode::transform_bytecode_repetition_min_max(bytecode_to_repeat, minimum, maybe_maximum, repetition_mark_id);
|
auto max_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
|
ByteCode::transform_bytecode_repetition_min_max(bytecode_to_repeat, minimum, maybe_maximum, min_repetition_mark_id, max_repetition_mark_id);
|
||||||
|
|
||||||
consume(TokenType::RightCurly, Error::MismatchingBrace);
|
consume(TokenType::RightCurly, Error::MismatchingBrace);
|
||||||
return !has_error();
|
return !has_error();
|
||||||
|
@ -1219,8 +1221,9 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
|
||||||
match_length_minimum = 0;
|
match_length_minimum = 0;
|
||||||
break;
|
break;
|
||||||
case Repetition::Explicit: {
|
case Repetition::Explicit: {
|
||||||
auto repetition_mark_id = m_parser_state.repetition_mark_count++;
|
auto min_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
ByteCode::transform_bytecode_repetition_min_max(stack, repeat_min.value(), repeat_max, repetition_mark_id, !ungreedy);
|
auto max_repetition_mark_id = m_parser_state.repetition_mark_count++;
|
||||||
|
ByteCode::transform_bytecode_repetition_min_max(stack, repeat_min.value(), repeat_max, min_repetition_mark_id, max_repetition_mark_id, !ungreedy);
|
||||||
match_length_minimum *= repeat_min.value();
|
match_length_minimum *= repeat_min.value();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue