1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 01:17:35 +00:00

LibRegex: Add support for the Basic POSIX regular expressions

This implements the internal regex stuff for #8506.
This commit is contained in:
Ali Mohammad Pur 2021-07-10 13:18:10 +04:30 committed by Andreas Kling
parent addfa1e82e
commit 54d89609de
3 changed files with 351 additions and 109 deletions

View file

@ -19,6 +19,7 @@
namespace regex {
class PosixExtendedParser;
class PosixBasicParser;
class ECMA262Parser;
template<typename T>
@ -34,6 +35,10 @@ template<>
struct ParserTraits<PosixExtendedParser> : public GenericParserTraits<PosixOptions> {
};
template<>
struct ParserTraits<PosixBasicParser> : public GenericParserTraits<PosixOptions> {
};
template<>
struct ParserTraits<ECMA262Parser> : public GenericParserTraits<ECMAScriptOptions> {
};
@ -112,18 +117,62 @@ protected:
ParserState m_parser_state;
};
class PosixExtendedParser final : public Parser {
public:
explicit PosixExtendedParser(Lexer& lexer)
class AbstractPosixParser : public Parser {
protected:
explicit AbstractPosixParser(Lexer& lexer)
: Parser(lexer)
{
}
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
AbstractPosixParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
: Parser(lexer, regex_options.value_or({}))
{
}
ALWAYS_INLINE bool parse_bracket_expression(Vector<CompareTypeAndValuePair>&, size_t&);
};
class PosixBasicParser final : public AbstractPosixParser {
public:
explicit PosixBasicParser(Lexer& lexer)
: AbstractPosixParser(lexer)
{
}
PosixBasicParser(Lexer& lexer, Optional<typename ParserTraits<PosixBasicParser>::OptionsType> regex_options)
: AbstractPosixParser(lexer, regex_options.value_or({}))
{
}
~PosixBasicParser() = default;
private:
bool parse_internal(ByteCode&, size_t&) override;
bool parse_root(ByteCode&, size_t&);
bool parse_re_expression(ByteCode&, size_t&);
bool parse_simple_re(ByteCode&, size_t&);
bool parse_nonduplicating_re(ByteCode&, size_t&);
bool parse_one_char_or_collation_element(ByteCode&, size_t&);
size_t m_capture_group { 0 };
constexpr static size_t number_of_addressable_capture_groups = 9;
size_t m_capture_group_minimum_lengths[number_of_addressable_capture_groups] { 0 };
bool m_capture_group_seen[number_of_addressable_capture_groups] { false };
};
class PosixExtendedParser final : public AbstractPosixParser {
public:
explicit PosixExtendedParser(Lexer& lexer)
: AbstractPosixParser(lexer)
{
}
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
: AbstractPosixParser(lexer, regex_options.value_or({}))
{
}
~PosixExtendedParser() = default;
private:
@ -197,9 +246,11 @@ private:
};
using PosixExtended = PosixExtendedParser;
using PosixBasic = PosixBasicParser;
using ECMA262 = ECMA262Parser;
}
using regex::ECMA262;
using regex::PosixBasic;
using regex::PosixExtended;