LibRegex: Convert regex::Lexer to inherit from GenericLexer

This will allow regex::Lexer users to invoke GenericLexer consumption methods, such as GenericLexer::consume_escaped_codepoint(). This also allows for de-duplicating common methods between the lexers.
2025-09-13 18:47:59 +00:00 · 2021-08-18 14:10:08 -04:00 · 2021-08-18 14:10:08 -04:00 · 5ff9596678
commit 5ff9596678
parent dd44a5e948
3 changed files with 28 additions and 62 deletions
--- a/Userland/Libraries/LibRegex/RegexLexer.cpp
+++ b/Userland/Libraries/LibRegex/RegexLexer.cpp
@ -31,86 +31,58 @@ char const* Token::name() const
    return name(m_type);
 }
-Lexer::Lexer(StringView const source)
+Lexer::Lexer()
-    : m_source(source)
+    : GenericLexer(StringView { nullptr })
 {
 }
-ALWAYS_INLINE int Lexer::peek(size_t offset) const
+Lexer::Lexer(StringView const source)
    : GenericLexer(source)
 {
    if ((m_position + offset) >= m_source.length())
        return EOF;
    return (unsigned char)m_source[m_position + offset];
 }
 void Lexer::back(size_t offset)
 {
-    if (offset == m_position + 1)
+    if (offset == m_index + 1)
-        offset = m_position; // 'position == 0' occurs twice.
+        offset = m_index; // 'position == 0' occurs twice.
-    VERIFY(offset <= m_position);
+    VERIFY(offset <= m_index);
    if (!offset)
        return;
-    m_position -= offset;
+    m_index -= offset;
-    m_previous_position = (m_position > 0) ? m_position - 1 : 0;
+    m_previous_position = (m_index > 0) ? m_index - 1 : 0;
    m_current_char = m_source[m_position];
 }
-ALWAYS_INLINE void Lexer::consume()
+char Lexer::consume()
 {
-    m_previous_position = m_position;
+    m_previous_position = m_index;
-
+    return GenericLexer::consume();
    if (m_position >= m_source.length()) {
        m_position = m_source.length() + 1;
        m_current_char = EOF;
        return;
    }
    m_current_char = m_source[m_position++];
 }
 void Lexer::reset()
 {
-    m_position = 0;
+    m_index = 0;
    m_current_token = { TokenType::Eof, 0, StringView(nullptr) };
    m_current_char = 0;
    m_previous_position = 0;
 }
 bool Lexer::try_skip(char c)
 {
    if (peek() != c)
        return false;
    consume();
    return true;
 }
 char Lexer::skip()
 {
    auto c = peek();
    consume();
    VERIFY(c != EOF);
    return c;
 }
 Token Lexer::next()
 {
    size_t token_start_position;
    auto begin_token = [&] {
-        token_start_position = m_position;
+        token_start_position = m_index;
    };
    auto commit_token = [&](auto type) -> Token& {
-        VERIFY(token_start_position + m_previous_position - token_start_position + 1 <= m_source.length());
+        VERIFY(token_start_position + m_previous_position - token_start_position + 1 <= m_input.length());
-        auto substring = m_source.substring_view(token_start_position, m_previous_position - token_start_position + 1);
+        auto substring = m_input.substring_view(token_start_position, m_previous_position - token_start_position + 1);
        m_current_token = Token(type, token_start_position, substring);
        return m_current_token;
    };
    auto emit_token = [&](auto type) -> Token& {
-        m_current_token = Token(type, m_position, m_source.substring_view(m_position, 1));
+        m_current_token = Token(type, m_index, m_input.substring_view(m_index, 1));
        consume();
        return m_current_token;
    };
@ -137,7 +109,7 @@ Token Lexer::next()
        }
    };
-    while (m_position <= m_source.length()) {
+    while (m_index <= m_input.length()) {
        auto ch = peek();
        if (ch == '(')
            return emit_token(TokenType::LeftParen);
@ -203,13 +175,13 @@ Token Lexer::next()
            }
        }
-        if (ch == EOF)
+        if (ch == '\0')
            break;
        return emit_token(TokenType::Char);
    }
-    return Token(TokenType::Eof, m_position, nullptr);
+    return Token(TokenType::Eof, m_index, nullptr);
 }
 }
--- a/Userland/Libraries/LibRegex/RegexLexer.h
+++ b/Userland/Libraries/LibRegex/RegexLexer.h
@ -7,6 +7,7 @@
 #pragma once
 #include <AK/Forward.h>
 #include <AK/GenericLexer.h>
 #include <AK/StringView.h>
 namespace regex {
@ -63,27 +64,20 @@ private:
    StringView m_value { nullptr };
 };
-class Lexer {
+class Lexer : public GenericLexer {
 public:
-    Lexer() = default;
+    Lexer();
    explicit Lexer(StringView const source);
    Token next();
    void reset();
    void back(size_t offset);
-    void set_source(StringView const source) { m_source = source; }
+    char consume();
-    bool try_skip(char);
+    void set_source(StringView const source) { m_input = source; }
-    char skip();
+    auto const& source() const { return m_input; }
    auto const& source() const { return m_source; }
 private:
    ALWAYS_INLINE int peek(size_t offset = 0) const;
    ALWAYS_INLINE void consume();
    StringView m_source {};
    size_t m_position { 0 };
    size_t m_previous_position { 0 };
    Token m_current_token { TokenType::Eof, 0, StringView(nullptr) };
    int m_current_char { 0 };
 };
 }
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@ -101,7 +101,7 @@ ALWAYS_INLINE bool Parser::try_skip(StringView str)
    size_t potentially_go_back { 0 };
    for (auto ch : str) {
-        if (!m_parser_state.lexer.try_skip(ch)) {
+        if (!m_parser_state.lexer.consume_specific(ch)) {
            m_parser_state.lexer.back(potentially_go_back);
            return false;
        }
@ -129,7 +129,7 @@ ALWAYS_INLINE char Parser::skip()
        ch = m_parser_state.current_token.value()[0];
    } else {
        m_parser_state.lexer.back(m_parser_state.current_token.value().length());
-        ch = m_parser_state.lexer.skip();
+        ch = m_parser_state.lexer.consume();
    }
    m_parser_state.current_token = m_parser_state.lexer.next();