mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 02:17:35 +00:00
LibRegex: Fix greedy/reluctant modifiers in PosixExtendedParser
Also fixes the issue with assertions causing early termination when they fail.
This commit is contained in:
parent
45e5661296
commit
92ea9ed4a5
5 changed files with 42 additions and 26 deletions
|
@ -188,26 +188,26 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(const MatchInput&, MatchS
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
||||||
{
|
{
|
||||||
if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||||
return ExecutionResult::Failed;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
|
if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||||
|| (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
|| (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|
||||||
|| (0 == state.string_position && (input.regex_options & AllFlags::Global)))
|
|| (0 == state.string_position && (input.regex_options & AllFlags::Global)))
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
|
|
||||||
return ExecutionResult::Failed;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
|
||||||
{
|
{
|
||||||
if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine))
|
if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine))
|
||||||
return ExecutionResult::Failed;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine))
|
if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine))
|
||||||
|| (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
|
|| (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
|
||||||
return ExecutionResult::Succeeded;
|
return ExecutionResult::Continue;
|
||||||
|
|
||||||
return ExecutionResult::Failed;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(const MatchInput& input, MatchState& state, MatchOutput& output) const
|
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(const MatchInput& input, MatchState& state, MatchOutput& output) const
|
||||||
|
|
|
@ -204,7 +204,7 @@ public:
|
||||||
void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
|
void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
|
||||||
{
|
{
|
||||||
|
|
||||||
// FORKSTAY _ALT
|
// FORKJUMP _ALT
|
||||||
// REGEXP ALT1
|
// REGEXP ALT1
|
||||||
// JUMP _END
|
// JUMP _END
|
||||||
// LABEL _ALT
|
// LABEL _ALT
|
||||||
|
@ -266,12 +266,12 @@ public:
|
||||||
{
|
{
|
||||||
// LABEL _START = -bytecode_to_repeat.size()
|
// LABEL _START = -bytecode_to_repeat.size()
|
||||||
// REGEXP
|
// REGEXP
|
||||||
// FORKJUMP _START (FORKSTAY -> Greedy)
|
// FORKSTAY _START (FORKJUMP -> Greedy)
|
||||||
|
|
||||||
if (greedy)
|
if (greedy)
|
||||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
|
||||||
else
|
|
||||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||||
|
else
|
||||||
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||||
|
|
||||||
bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
|
bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
|
||||||
}
|
}
|
||||||
|
@ -279,7 +279,7 @@ public:
|
||||||
void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
|
void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
|
||||||
{
|
{
|
||||||
// LABEL _START
|
// LABEL _START
|
||||||
// FORKSTAY _END (FORKJUMP -> Greedy)
|
// FORKJUMP _END (FORKSTAY -> Greedy)
|
||||||
// REGEXP
|
// REGEXP
|
||||||
// JUMP _START
|
// JUMP _START
|
||||||
// LABEL _END
|
// LABEL _END
|
||||||
|
@ -288,9 +288,9 @@ public:
|
||||||
ByteCode bytecode;
|
ByteCode bytecode;
|
||||||
|
|
||||||
if (greedy)
|
if (greedy)
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
||||||
else
|
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||||
|
else
|
||||||
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||||
|
|
||||||
bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
|
bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
|
||||||
|
|
||||||
|
@ -306,15 +306,15 @@ public:
|
||||||
|
|
||||||
void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
|
void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
|
||||||
{
|
{
|
||||||
// FORKSTAY _END (FORKJUMP -> Greedy)
|
// FORKJUMP _END (FORKSTAY -> Greedy)
|
||||||
// REGEXP
|
// REGEXP
|
||||||
// LABEL _END
|
// LABEL _END
|
||||||
ByteCode bytecode;
|
ByteCode bytecode;
|
||||||
|
|
||||||
if (greedy)
|
if (greedy)
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
||||||
else
|
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
||||||
|
else
|
||||||
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||||
|
|
||||||
bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
|
bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
|
||||||
|
|
||||||
|
|
|
@ -202,23 +202,23 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
|
||||||
} else if (match(TokenType::Plus)) {
|
} else if (match(TokenType::Plus)) {
|
||||||
consume();
|
consume();
|
||||||
|
|
||||||
bool greedy = match(TokenType::Questionmark);
|
bool nongreedy = match(TokenType::Questionmark);
|
||||||
if (greedy)
|
if (nongreedy)
|
||||||
consume();
|
consume();
|
||||||
|
|
||||||
// Note: dont touch match_length_minimum, it's already correct
|
// Note: dont touch match_length_minimum, it's already correct
|
||||||
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
|
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, !nongreedy);
|
||||||
return !has_error();
|
return !has_error();
|
||||||
|
|
||||||
} else if (match(TokenType::Asterisk)) {
|
} else if (match(TokenType::Asterisk)) {
|
||||||
consume();
|
consume();
|
||||||
match_length_minimum = 0;
|
match_length_minimum = 0;
|
||||||
|
|
||||||
bool greedy = match(TokenType::Questionmark);
|
bool nongreedy = match(TokenType::Questionmark);
|
||||||
if (greedy)
|
if (nongreedy)
|
||||||
consume();
|
consume();
|
||||||
|
|
||||||
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, greedy);
|
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, !nongreedy);
|
||||||
|
|
||||||
return !has_error();
|
return !has_error();
|
||||||
|
|
||||||
|
@ -226,11 +226,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
|
||||||
consume();
|
consume();
|
||||||
match_length_minimum = 0;
|
match_length_minimum = 0;
|
||||||
|
|
||||||
bool greedy = match(TokenType::Questionmark);
|
bool nongreedy = match(TokenType::Questionmark);
|
||||||
if (greedy)
|
if (nongreedy)
|
||||||
consume();
|
consume();
|
||||||
|
|
||||||
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, greedy);
|
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, !nongreedy);
|
||||||
return !has_error();
|
return !has_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -121,9 +121,15 @@ protected:
|
||||||
class PosixExtendedParser final : public Parser {
|
class PosixExtendedParser final : public Parser {
|
||||||
public:
|
public:
|
||||||
explicit PosixExtendedParser(Lexer& lexer)
|
explicit PosixExtendedParser(Lexer& lexer)
|
||||||
: Parser(lexer) {};
|
: Parser(lexer)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
|
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
|
||||||
: Parser(lexer, regex_options.value_or({})) {};
|
: Parser(lexer, regex_options.value_or({}))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
~PosixExtendedParser() = default;
|
~PosixExtendedParser() = default;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -334,6 +334,16 @@ TEST_CASE(match_all_character_class)
|
||||||
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
|
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(match_character_class_with_assertion)
|
||||||
|
{
|
||||||
|
Regex<PosixExtended> re("[[:alpha:]]+$");
|
||||||
|
String str = "abcdef";
|
||||||
|
RegexResult result = match(str, re);
|
||||||
|
|
||||||
|
EXPECT_EQ(result.success, true);
|
||||||
|
EXPECT_EQ(result.count, 1u);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(example_for_git_commit)
|
TEST_CASE(example_for_git_commit)
|
||||||
{
|
{
|
||||||
Regex<PosixExtended> re("^.*$");
|
Regex<PosixExtended> re("^.*$");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue