From 98624fe03f047bc33002780bec2e37768c68e4b5 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Tue, 31 Aug 2021 00:55:35 +0430 Subject: [PATCH] LibRegex: Implement min/max repetition using the Repeat bytecode This makes repetitions with large max bounds work correctly. Also fixes an OOM issue found by OSS-Fuzz: https://oss-fuzz.com/testcase?key=4725721980338176 --- Userland/Libraries/LibRegex/RegexByteCode.h | 29 ++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index 926737fa8b..cb22806459 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -339,17 +339,34 @@ public: new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, repetition_mark_id); if (maximum.has_value()) { + // (REPEAT REGEXP MIN) + // LABEL _MAX_LOOP | + // FORK END | + // REGEXP | + // REPEAT _MAX_LOOP MAX-1 | if max > min + // REGEXP | + // FORK END | + // LABEL END | auto jump_kind = static_cast(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump); if (maximum.value() > minimum) { - auto diff = maximum.value() - minimum; new_bytecode.empend(jump_kind); - new_bytecode.empend(diff * (bytecode_to_repeat.size() + 2)); // Jump to the _END label - - for (T i = 0; i < diff; ++i) { + new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target. + auto pre_loop_fork_jump_index = new_bytecode.size(); + auto repetitions = maximum.value() - minimum; + dbgln("max {}, min {}, reps {}", *maximum, minimum, repetitions); + if (repetitions > 1) { new_bytecode.extend(bytecode_to_repeat); - new_bytecode.empend(jump_kind); - new_bytecode.empend((diff - i - 1) * (bytecode_to_repeat.size() + 2)); // Jump to the _END label + new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat); + new_bytecode.empend(bytecode_to_repeat.size() + 2); + new_bytecode.empend(static_cast(repetitions - 1)); + new_bytecode.empend(repetition_mark_id); } + new_bytecode.extend(bytecode_to_repeat); + new_bytecode.empend(jump_kind); + new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target. + auto post_loop_fork_jump_index = new_bytecode.size(); + new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(new_bytecode.size() - pre_loop_fork_jump_index); + new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(new_bytecode.size() - post_loop_fork_jump_index); } } else { // no maximum value set, repeat finding if possible