mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:17:36 +00:00
LibRegex: Convert regex::Lexer to inherit from GenericLexer
This will allow regex::Lexer users to invoke GenericLexer consumption methods, such as GenericLexer::consume_escaped_codepoint(). This also allows for de-duplicating common methods between the lexers.
This commit is contained in:
parent
dd44a5e948
commit
5ff9596678
3 changed files with 28 additions and 62 deletions
|
@ -31,86 +31,58 @@ char const* Token::name() const
|
||||||
return name(m_type);
|
return name(m_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
Lexer::Lexer(StringView const source)
|
Lexer::Lexer()
|
||||||
: m_source(source)
|
: GenericLexer(StringView { nullptr })
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE int Lexer::peek(size_t offset) const
|
Lexer::Lexer(StringView const source)
|
||||||
|
: GenericLexer(source)
|
||||||
{
|
{
|
||||||
if ((m_position + offset) >= m_source.length())
|
|
||||||
return EOF;
|
|
||||||
return (unsigned char)m_source[m_position + offset];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::back(size_t offset)
|
void Lexer::back(size_t offset)
|
||||||
{
|
{
|
||||||
if (offset == m_position + 1)
|
if (offset == m_index + 1)
|
||||||
offset = m_position; // 'position == 0' occurs twice.
|
offset = m_index; // 'position == 0' occurs twice.
|
||||||
|
|
||||||
VERIFY(offset <= m_position);
|
VERIFY(offset <= m_index);
|
||||||
if (!offset)
|
if (!offset)
|
||||||
return;
|
return;
|
||||||
m_position -= offset;
|
m_index -= offset;
|
||||||
m_previous_position = (m_position > 0) ? m_position - 1 : 0;
|
m_previous_position = (m_index > 0) ? m_index - 1 : 0;
|
||||||
m_current_char = m_source[m_position];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE void Lexer::consume()
|
char Lexer::consume()
|
||||||
{
|
{
|
||||||
m_previous_position = m_position;
|
m_previous_position = m_index;
|
||||||
|
return GenericLexer::consume();
|
||||||
if (m_position >= m_source.length()) {
|
|
||||||
m_position = m_source.length() + 1;
|
|
||||||
m_current_char = EOF;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_current_char = m_source[m_position++];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::reset()
|
void Lexer::reset()
|
||||||
{
|
{
|
||||||
m_position = 0;
|
m_index = 0;
|
||||||
m_current_token = { TokenType::Eof, 0, StringView(nullptr) };
|
m_current_token = { TokenType::Eof, 0, StringView(nullptr) };
|
||||||
m_current_char = 0;
|
|
||||||
m_previous_position = 0;
|
m_previous_position = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Lexer::try_skip(char c)
|
|
||||||
{
|
|
||||||
if (peek() != c)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
consume();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
char Lexer::skip()
|
|
||||||
{
|
|
||||||
auto c = peek();
|
|
||||||
consume();
|
|
||||||
VERIFY(c != EOF);
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
Token Lexer::next()
|
Token Lexer::next()
|
||||||
{
|
{
|
||||||
size_t token_start_position;
|
size_t token_start_position;
|
||||||
|
|
||||||
auto begin_token = [&] {
|
auto begin_token = [&] {
|
||||||
token_start_position = m_position;
|
token_start_position = m_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto commit_token = [&](auto type) -> Token& {
|
auto commit_token = [&](auto type) -> Token& {
|
||||||
VERIFY(token_start_position + m_previous_position - token_start_position + 1 <= m_source.length());
|
VERIFY(token_start_position + m_previous_position - token_start_position + 1 <= m_input.length());
|
||||||
auto substring = m_source.substring_view(token_start_position, m_previous_position - token_start_position + 1);
|
auto substring = m_input.substring_view(token_start_position, m_previous_position - token_start_position + 1);
|
||||||
m_current_token = Token(type, token_start_position, substring);
|
m_current_token = Token(type, token_start_position, substring);
|
||||||
return m_current_token;
|
return m_current_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto emit_token = [&](auto type) -> Token& {
|
auto emit_token = [&](auto type) -> Token& {
|
||||||
m_current_token = Token(type, m_position, m_source.substring_view(m_position, 1));
|
m_current_token = Token(type, m_index, m_input.substring_view(m_index, 1));
|
||||||
consume();
|
consume();
|
||||||
return m_current_token;
|
return m_current_token;
|
||||||
};
|
};
|
||||||
|
@ -137,7 +109,7 @@ Token Lexer::next()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
while (m_position <= m_source.length()) {
|
while (m_index <= m_input.length()) {
|
||||||
auto ch = peek();
|
auto ch = peek();
|
||||||
if (ch == '(')
|
if (ch == '(')
|
||||||
return emit_token(TokenType::LeftParen);
|
return emit_token(TokenType::LeftParen);
|
||||||
|
@ -203,13 +175,13 @@ Token Lexer::next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch == EOF)
|
if (ch == '\0')
|
||||||
break;
|
break;
|
||||||
|
|
||||||
return emit_token(TokenType::Char);
|
return emit_token(TokenType::Char);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Token(TokenType::Eof, m_position, nullptr);
|
return Token(TokenType::Eof, m_index, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/Forward.h>
|
#include <AK/Forward.h>
|
||||||
|
#include <AK/GenericLexer.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
|
||||||
namespace regex {
|
namespace regex {
|
||||||
|
@ -63,27 +64,20 @@ private:
|
||||||
StringView m_value { nullptr };
|
StringView m_value { nullptr };
|
||||||
};
|
};
|
||||||
|
|
||||||
class Lexer {
|
class Lexer : public GenericLexer {
|
||||||
public:
|
public:
|
||||||
Lexer() = default;
|
Lexer();
|
||||||
explicit Lexer(StringView const source);
|
explicit Lexer(StringView const source);
|
||||||
Token next();
|
Token next();
|
||||||
void reset();
|
void reset();
|
||||||
void back(size_t offset);
|
void back(size_t offset);
|
||||||
void set_source(StringView const source) { m_source = source; }
|
char consume();
|
||||||
bool try_skip(char);
|
void set_source(StringView const source) { m_input = source; }
|
||||||
char skip();
|
auto const& source() const { return m_input; }
|
||||||
auto const& source() const { return m_source; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ALWAYS_INLINE int peek(size_t offset = 0) const;
|
|
||||||
ALWAYS_INLINE void consume();
|
|
||||||
|
|
||||||
StringView m_source {};
|
|
||||||
size_t m_position { 0 };
|
|
||||||
size_t m_previous_position { 0 };
|
size_t m_previous_position { 0 };
|
||||||
Token m_current_token { TokenType::Eof, 0, StringView(nullptr) };
|
Token m_current_token { TokenType::Eof, 0, StringView(nullptr) };
|
||||||
int m_current_char { 0 };
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,7 +101,7 @@ ALWAYS_INLINE bool Parser::try_skip(StringView str)
|
||||||
|
|
||||||
size_t potentially_go_back { 0 };
|
size_t potentially_go_back { 0 };
|
||||||
for (auto ch : str) {
|
for (auto ch : str) {
|
||||||
if (!m_parser_state.lexer.try_skip(ch)) {
|
if (!m_parser_state.lexer.consume_specific(ch)) {
|
||||||
m_parser_state.lexer.back(potentially_go_back);
|
m_parser_state.lexer.back(potentially_go_back);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,7 @@ ALWAYS_INLINE char Parser::skip()
|
||||||
ch = m_parser_state.current_token.value()[0];
|
ch = m_parser_state.current_token.value()[0];
|
||||||
} else {
|
} else {
|
||||||
m_parser_state.lexer.back(m_parser_state.current_token.value().length());
|
m_parser_state.lexer.back(m_parser_state.current_token.value().length());
|
||||||
ch = m_parser_state.lexer.skip();
|
ch = m_parser_state.lexer.consume();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_parser_state.current_token = m_parser_state.lexer.next();
|
m_parser_state.current_token = m_parser_state.lexer.next();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue