diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 4063984232..22e91cb1c4 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -1049,6 +1049,8 @@ TEST_CASE(optimizer_alternation) Tuple { "a|"sv, "a"sv, 1u }, Tuple { "a|a|a|a|a|a|a|a|a|b"sv, "a"sv, 1u }, Tuple { "ab|ac|ad|bc"sv, "bc"sv, 2u }, + // Should not crash on backwards jumps introduced by '.*'. + Tuple { "\\bDroid\\b.*Build|XT912|XT928|XT926|XT915|XT919|XT925|XT1021|\\bMoto E\\b|XT1068|XT1092|XT1052"sv, "XT1068"sv, 6u }, }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp index b12f3034fd..956f1e51ab 100644 --- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp @@ -863,6 +863,8 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives Vector>> incoming_jump_edges_for_each_alternative; incoming_jump_edges_for_each_alternative.resize(alternatives.size()); + auto has_any_backwards_jump = false; + MatchState state; for (size_t i = 0; i < alternatives.size(); ++i) { @@ -882,24 +884,31 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives switch (opcode.opcode_id()) { case OpCodeId::Jump: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::JumpNonEmpty: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::ForkJump: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::ForkStay: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::ForkReplaceJump: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::ForkReplaceStay: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump |= static_cast(opcode).offset() < 0; break; case OpCodeId::Repeat: incoming_jump_edges.ensure(static_cast(opcode).offset() + state.instruction_position).append({ opcode_bytes }); + has_any_backwards_jump = true; break; default: break; @@ -1067,6 +1076,16 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives Queue nodes_to_visit; nodes_to_visit.enqueue(&trie); + HashMap>> instruction_positions; + if (has_any_backwards_jump) + MUST(instruction_positions.try_ensure_capacity(alternatives.size())); + + auto ip_mapping_for_alternative = [&](size_t i) -> RedBlackTree& { + return *instruction_positions.ensure(i, [] { + return make>(); + }); + }; + // each node: // node.re // forkjump child1 @@ -1089,6 +1108,11 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives state.instruction_position = target.size(); target.append(insn_bytes); + if (has_any_backwards_jump) { + for (auto& ip : node->metadata_value()) + ip_mapping_for_alternative(ip.alternative_index).insert(ip.instruction_position, state.instruction_position); + } + auto& opcode = target.get_opcode(state); ssize_t jump_offset; @@ -1124,8 +1148,31 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives if (is_jump) { VERIFY(node->has_metadata()); - auto& ip = node->metadata_value().first(); - patch_locations.append({ QualifiedIP { ip.alternative_index, ip.instruction_position + jump_offset + opcode.size() }, patch_location }); + QualifiedIP ip = node->metadata_value().first(); + auto intended_jump_ip = ip.instruction_position + jump_offset + opcode.size(); + if (jump_offset < 0 && intended_jump_ip > 0) { + VERIFY(has_any_backwards_jump); + // We should've already seen this instruction, so we can just patch it in. + auto& ip_mapping = ip_mapping_for_alternative(ip.alternative_index); + auto target_ip = ip_mapping.find(intended_jump_ip); + if (!target_ip) { + RegexDebug dbg; + size_t x = 0; + for (auto& entry : alternatives) { + warnln("----------- {} ----------", x++); + dbg.print_bytecode(entry); + } + + dbgln("Regex Tree / Unknown backwards jump: {}@{} -> {}", + ip.instruction_position, + ip.alternative_index, + intended_jump_ip); + VERIFY_NOT_REACHED(); + } + target[patch_location] = static_cast(*target_ip - patch_location - 1); + } else { + patch_locations.append({ QualifiedIP { ip.alternative_index, intended_jump_ip }, patch_location }); + } } }