From af441bb939493671aa89db2f8f1ee6daf6868acc Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Wed, 15 Feb 2023 10:12:03 +0330 Subject: [PATCH] LibRegex: Consider the inverse=true case when finding pattern overlap Previously we were only checking for overlap when the range wasn't in inverse mode, which made us miss things like /[^x]x/; this patch makes it so we don't miss that. --- Tests/LibRegex/Regex.cpp | 2 ++ Userland/Libraries/LibRegex/RegexOptimizer.cpp | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 4776528e13..82db3cc9c6 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -984,6 +984,8 @@ TEST_CASE(optimizer_atomic_groups) Tuple { "(1+)0"sv, "10"sv, true }, // Rewrite should not skip over first required iteration of +. Tuple { "a+"sv, ""sv, false }, + // 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here. + Tuple { "[^x]+y"sv, "ay"sv, true }, }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp index 85879c8819..fbb55e4ac4 100644 --- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp @@ -251,7 +251,7 @@ static bool has_overlap(Vector const& lhs, Vector const& lhs, Vector(pair.value))) + if (current_lhs_inversion_state() ^ char_class_contains(static_cast(pair.value))) return true; break; case CharacterCompareType::CharRange: { auto range = CharRange(pair.value); - if (!current_lhs_inversion_state() && range_contains(range)) + if (current_lhs_inversion_state() ^ range_contains(range)) return true; break; }