From 92ea9ed4a538bb906c1047fd3e4ec4d6b5cf3520 Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Thu, 19 Nov 2020 18:57:39 +0330 Subject: [PATCH] LibRegex: Fix greedy/reluctant modifiers in PosixExtendedParser Also fixes the issue with assertions causing early termination when they fail. --- Libraries/LibRegex/RegexByteCode.cpp | 10 +++++----- Libraries/LibRegex/RegexByteCode.h | 20 ++++++++++---------- Libraries/LibRegex/RegexParser.cpp | 18 +++++++++--------- Libraries/LibRegex/RegexParser.h | 10 ++++++++-- Libraries/LibRegex/Tests/Regex.cpp | 10 ++++++++++ 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp index a11bc4d3c8..42aab9120f 100644 --- a/Libraries/LibRegex/RegexByteCode.cpp +++ b/Libraries/LibRegex/RegexByteCode.cpp @@ -188,26 +188,26 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(const MatchInput&, MatchS ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input, MatchState& state, MatchOutput&) const { if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine)) - return ExecutionResult::Failed; + return ExecutionResult::Failed_ExecuteLowPrioForks; if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine)) || (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine)) || (0 == state.string_position && (input.regex_options & AllFlags::Global))) return ExecutionResult::Continue; - return ExecutionResult::Failed; + return ExecutionResult::Failed_ExecuteLowPrioForks; } ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(const MatchInput& input, MatchState& state, MatchOutput&) const { if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine)) - return ExecutionResult::Failed; + return ExecutionResult::Failed_ExecuteLowPrioForks; if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine)) || (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine))) - return ExecutionResult::Succeeded; + return ExecutionResult::Continue; - return ExecutionResult::Failed; + return ExecutionResult::Failed_ExecuteLowPrioForks; } ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(const MatchInput& input, MatchState& state, MatchOutput& output) const diff --git a/Libraries/LibRegex/RegexByteCode.h b/Libraries/LibRegex/RegexByteCode.h index fce3b52b20..d468b74b37 100644 --- a/Libraries/LibRegex/RegexByteCode.h +++ b/Libraries/LibRegex/RegexByteCode.h @@ -204,7 +204,7 @@ public: void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right) { - // FORKSTAY _ALT + // FORKJUMP _ALT // REGEXP ALT1 // JUMP _END // LABEL _ALT @@ -266,12 +266,12 @@ public: { // LABEL _START = -bytecode_to_repeat.size() // REGEXP - // FORKJUMP _START (FORKSTAY -> Greedy) + // FORKSTAY _START (FORKJUMP -> Greedy) if (greedy) - bytecode_to_repeat.empend(static_cast(OpCodeId::ForkStay)); - else bytecode_to_repeat.empend(static_cast(OpCodeId::ForkJump)); + else + bytecode_to_repeat.empend(static_cast(OpCodeId::ForkStay)); bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label } @@ -279,7 +279,7 @@ public: void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy) { // LABEL _START - // FORKSTAY _END (FORKJUMP -> Greedy) + // FORKJUMP _END (FORKSTAY -> Greedy) // REGEXP // JUMP _START // LABEL _END @@ -288,9 +288,9 @@ public: ByteCode bytecode; if (greedy) - bytecode.empend(static_cast(OpCodeId::ForkJump)); - else bytecode.empend(static_cast(OpCodeId::ForkStay)); + else + bytecode.empend(static_cast(OpCodeId::ForkJump)); bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label @@ -306,15 +306,15 @@ public: void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy) { - // FORKSTAY _END (FORKJUMP -> Greedy) + // FORKJUMP _END (FORKSTAY -> Greedy) // REGEXP // LABEL _END ByteCode bytecode; if (greedy) - bytecode.empend(static_cast(OpCodeId::ForkJump)); - else bytecode.empend(static_cast(OpCodeId::ForkStay)); + else + bytecode.empend(static_cast(OpCodeId::ForkJump)); bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index a4b1235e83..5d44c117d7 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -202,23 +202,23 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco } else if (match(TokenType::Plus)) { consume(); - bool greedy = match(TokenType::Questionmark); - if (greedy) + bool nongreedy = match(TokenType::Questionmark); + if (nongreedy) consume(); // Note: dont touch match_length_minimum, it's already correct - bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, greedy); + bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, !nongreedy); return !has_error(); } else if (match(TokenType::Asterisk)) { consume(); match_length_minimum = 0; - bool greedy = match(TokenType::Questionmark); - if (greedy) + bool nongreedy = match(TokenType::Questionmark); + if (nongreedy) consume(); - bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, greedy); + bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, !nongreedy); return !has_error(); @@ -226,11 +226,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco consume(); match_length_minimum = 0; - bool greedy = match(TokenType::Questionmark); - if (greedy) + bool nongreedy = match(TokenType::Questionmark); + if (nongreedy) consume(); - bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, greedy); + bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, !nongreedy); return !has_error(); } diff --git a/Libraries/LibRegex/RegexParser.h b/Libraries/LibRegex/RegexParser.h index a94dfa3a28..11d5b6fe3d 100644 --- a/Libraries/LibRegex/RegexParser.h +++ b/Libraries/LibRegex/RegexParser.h @@ -121,9 +121,15 @@ protected: class PosixExtendedParser final : public Parser { public: explicit PosixExtendedParser(Lexer& lexer) - : Parser(lexer) {}; + : Parser(lexer) + { + } + PosixExtendedParser(Lexer& lexer, Optional::OptionsType> regex_options) - : Parser(lexer, regex_options.value_or({})) {}; + : Parser(lexer, regex_options.value_or({})) + { + } + ~PosixExtendedParser() = default; private: diff --git a/Libraries/LibRegex/Tests/Regex.cpp b/Libraries/LibRegex/Tests/Regex.cpp index 8784cab0e4..12487d3590 100644 --- a/Libraries/LibRegex/Tests/Regex.cpp +++ b/Libraries/LibRegex/Tests/Regex.cpp @@ -334,6 +334,16 @@ TEST_CASE(match_all_character_class) EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]); } +TEST_CASE(match_character_class_with_assertion) +{ + Regex re("[[:alpha:]]+$"); + String str = "abcdef"; + RegexResult result = match(str, re); + + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.count, 1u); +} + TEST_CASE(example_for_git_commit) { Regex re("^.*$");