mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 05:27:43 +00:00
LibRegex: Make the optimizer understand references and capture groups
Otherwise the fork in patterns like `(1+)\1` would be (incorrectly) optimized away.
This commit is contained in:
parent
7de23aede2
commit
741886a4c4
2 changed files with 23 additions and 2 deletions
|
@ -898,6 +898,10 @@ TEST_CASE(optimizer_atomic_groups)
|
||||||
// Alternative fuse
|
// Alternative fuse
|
||||||
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
|
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
|
||||||
Tuple { "(a|a)"sv, "a"sv, true },
|
Tuple { "(a|a)"sv, "a"sv, true },
|
||||||
|
// ForkReplace shouldn't be applied where it would change the semantics
|
||||||
|
Tuple { "(1+)\\1"sv, "11"sv, true },
|
||||||
|
Tuple { "(1+)1"sv, "11"sv, true },
|
||||||
|
Tuple { "(1+)0"sv, "10"sv, true },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
|
|
|
@ -104,6 +104,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks()
|
||||||
static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
||||||
{
|
{
|
||||||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||||
|
HashTable<size_t> active_capture_groups;
|
||||||
MatchState state;
|
MatchState state;
|
||||||
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
|
@ -126,6 +127,12 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
||||||
case OpCodeId::Restore:
|
case OpCodeId::Restore:
|
||||||
case OpCodeId::GoBack:
|
case OpCodeId::GoBack:
|
||||||
return false;
|
return false;
|
||||||
|
case OpCodeId::SaveRightCaptureGroup:
|
||||||
|
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
|
||||||
|
break;
|
||||||
|
case OpCodeId::SaveLeftCaptureGroup:
|
||||||
|
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -133,19 +140,29 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
||||||
state.instruction_position += opcode.size();
|
state.instruction_position += opcode.size();
|
||||||
}
|
}
|
||||||
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
|
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
|
||||||
|
dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
|
||||||
|
|
||||||
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
|
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
|
||||||
for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
|
for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
switch (opcode.opcode_id()) {
|
switch (opcode.opcode_id()) {
|
||||||
|
// Note: These have to exist since we're effectively repeating the following block as well
|
||||||
|
case OpCodeId::SaveRightCaptureGroup:
|
||||||
|
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
|
||||||
|
break;
|
||||||
|
case OpCodeId::SaveLeftCaptureGroup:
|
||||||
|
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
||||||
|
break;
|
||||||
case OpCodeId::Compare: {
|
case OpCodeId::Compare: {
|
||||||
// We found a compare, let's see what it has.
|
// We found a compare, let's see what it has.
|
||||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||||
if (compares.is_empty())
|
if (compares.is_empty())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// If either side can match _anything_, fail.
|
if (any_of(compares, [&](auto& compare) {
|
||||||
if (any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
return compare.type == CharacterCompareType::AnyChar
|
||||||
|
|| (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
|
||||||
|
}))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (auto& repeated_value : repeated_values) {
|
for (auto& repeated_value : repeated_values) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue