mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 10:44:58 +00:00

This parser will be used by the C++ langauge server to provide better auto-complete (& maybe also other things in the future). It is designed to be error tolerant, and keeps track of the position spans of the AST nodes, which should be useful later for incremental parsing.
803 lines
22 KiB
C++
803 lines
22 KiB
C++
/*
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "Lexer.h"
|
|
#include <AK/HashTable.h>
|
|
#include <AK/StdLibExtras.h>
|
|
#include <AK/String.h>
|
|
#include <ctype.h>
|
|
|
|
namespace Cpp {
|
|
|
|
Lexer::Lexer(const StringView& input)
|
|
: m_input(input)
|
|
{
|
|
}
|
|
|
|
char Lexer::peek(size_t offset) const
|
|
{
|
|
if ((m_index + offset) >= m_input.length())
|
|
return 0;
|
|
return m_input[m_index + offset];
|
|
}
|
|
|
|
char Lexer::consume()
|
|
{
|
|
ASSERT(m_index < m_input.length());
|
|
char ch = m_input[m_index++];
|
|
m_previous_position = m_position;
|
|
if (ch == '\n') {
|
|
m_position.line++;
|
|
m_position.column = 0;
|
|
} else {
|
|
m_position.column++;
|
|
}
|
|
return ch;
|
|
}
|
|
|
|
static bool is_valid_first_character_of_identifier(char ch)
|
|
{
|
|
return isalpha(ch) || ch == '_' || ch == '$';
|
|
}
|
|
|
|
static bool is_valid_nonfirst_character_of_identifier(char ch)
|
|
{
|
|
return is_valid_first_character_of_identifier(ch) || isdigit(ch);
|
|
}
|
|
|
|
constexpr const char* s_known_keywords[] = {
|
|
"alignas",
|
|
"alignof",
|
|
"and",
|
|
"and_eq",
|
|
"asm",
|
|
"bitand",
|
|
"bitor",
|
|
"bool",
|
|
"break",
|
|
"case",
|
|
"catch",
|
|
"class",
|
|
"compl",
|
|
"const",
|
|
"const_cast",
|
|
"constexpr",
|
|
"continue",
|
|
"decltype",
|
|
"default",
|
|
"delete",
|
|
"do",
|
|
"dynamic_cast",
|
|
"else",
|
|
"enum",
|
|
"explicit",
|
|
"export",
|
|
"extern",
|
|
"false",
|
|
"final",
|
|
"for",
|
|
"friend",
|
|
"goto",
|
|
"if",
|
|
"inline",
|
|
"mutable",
|
|
"namespace",
|
|
"new",
|
|
"noexcept",
|
|
"not",
|
|
"not_eq",
|
|
"nullptr",
|
|
"operator",
|
|
"or",
|
|
"or_eq",
|
|
"override",
|
|
"private",
|
|
"protected",
|
|
"public",
|
|
"register",
|
|
"reinterpret_cast",
|
|
"return",
|
|
"signed",
|
|
"sizeof",
|
|
"static",
|
|
"static_assert",
|
|
"static_cast",
|
|
"struct",
|
|
"switch",
|
|
"template",
|
|
"this",
|
|
"thread_local",
|
|
"throw",
|
|
"true",
|
|
"try",
|
|
"typedef",
|
|
"typeid",
|
|
"typename",
|
|
"union",
|
|
"using",
|
|
"virtual",
|
|
"volatile",
|
|
"while",
|
|
"xor",
|
|
"xor_eq"
|
|
};
|
|
|
|
constexpr const char* s_known_types[] = {
|
|
"ByteBuffer",
|
|
"CircularDeque",
|
|
"CircularQueue",
|
|
"Deque",
|
|
"DoublyLinkedList",
|
|
"FileSystemPath",
|
|
"Array",
|
|
"Function",
|
|
"HashMap",
|
|
"HashTable",
|
|
"IPv4Address",
|
|
"InlineLinkedList",
|
|
"IntrusiveList",
|
|
"JsonArray",
|
|
"JsonObject",
|
|
"JsonValue",
|
|
"MappedFile",
|
|
"NetworkOrdered",
|
|
"NonnullOwnPtr",
|
|
"NonnullOwnPtrVector",
|
|
"NonnullRefPtr",
|
|
"NonnullRefPtrVector",
|
|
"Optional",
|
|
"OwnPtr",
|
|
"RefPtr",
|
|
"Result",
|
|
"ScopeGuard",
|
|
"SinglyLinkedList",
|
|
"String",
|
|
"StringBuilder",
|
|
"StringImpl",
|
|
"StringView",
|
|
"Utf8View",
|
|
"Vector",
|
|
"WeakPtr",
|
|
"auto",
|
|
"char",
|
|
"char16_t",
|
|
"char32_t",
|
|
"char8_t",
|
|
"double",
|
|
"float",
|
|
"i16",
|
|
"i32",
|
|
"i64",
|
|
"i8",
|
|
"int",
|
|
"int",
|
|
"long",
|
|
"short",
|
|
"signed",
|
|
"u16",
|
|
"u32",
|
|
"u64",
|
|
"u8",
|
|
"unsigned",
|
|
"void",
|
|
"wchar_t"
|
|
};
|
|
|
|
static bool is_keyword(const StringView& string)
|
|
{
|
|
static HashTable<String> keywords(array_size(s_known_keywords));
|
|
if (keywords.is_empty()) {
|
|
keywords.set_from(s_known_keywords);
|
|
}
|
|
return keywords.contains(string);
|
|
}
|
|
|
|
static bool is_known_type(const StringView& string)
|
|
{
|
|
static HashTable<String> types(array_size(s_known_types));
|
|
if (types.is_empty()) {
|
|
types.set_from(s_known_types);
|
|
}
|
|
return types.contains(string);
|
|
}
|
|
|
|
Vector<Token> Lexer::lex()
|
|
{
|
|
Vector<Token> tokens;
|
|
|
|
size_t token_start_index = 0;
|
|
Position token_start_position;
|
|
|
|
auto emit_token = [&](auto type) {
|
|
Token token;
|
|
token.m_type = type;
|
|
token.m_start = m_position;
|
|
token.m_end = m_position;
|
|
tokens.append(token);
|
|
consume();
|
|
};
|
|
|
|
auto begin_token = [&] {
|
|
token_start_index = m_index;
|
|
token_start_position = m_position;
|
|
};
|
|
auto commit_token = [&](auto type) {
|
|
Token token;
|
|
token.m_type = type;
|
|
token.m_start = token_start_position;
|
|
token.m_end = m_previous_position;
|
|
tokens.append(token);
|
|
};
|
|
|
|
auto emit_token_equals = [&](auto type, auto equals_type) {
|
|
if (peek(1) == '=') {
|
|
begin_token();
|
|
consume();
|
|
consume();
|
|
commit_token(equals_type);
|
|
return;
|
|
}
|
|
emit_token(type);
|
|
};
|
|
|
|
auto match_escape_sequence = [&]() -> size_t {
|
|
switch (peek(1)) {
|
|
case '\'':
|
|
case '"':
|
|
case '?':
|
|
case '\\':
|
|
case 'a':
|
|
case 'b':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'v':
|
|
return 2;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7': {
|
|
size_t octal_digits = 1;
|
|
for (size_t i = 0; i < 2; ++i) {
|
|
char next = peek(2 + i);
|
|
if (next < '0' || next > '7')
|
|
break;
|
|
++octal_digits;
|
|
}
|
|
return 1 + octal_digits;
|
|
}
|
|
case 'x': {
|
|
size_t hex_digits = 0;
|
|
while (isxdigit(peek(2 + hex_digits)))
|
|
++hex_digits;
|
|
return 2 + hex_digits;
|
|
}
|
|
case 'u':
|
|
case 'U': {
|
|
bool is_unicode = true;
|
|
size_t number_of_digits = peek(1) == 'u' ? 4 : 8;
|
|
for (size_t i = 0; i < number_of_digits; ++i) {
|
|
if (!isxdigit(peek(2 + i))) {
|
|
is_unicode = false;
|
|
break;
|
|
}
|
|
}
|
|
return is_unicode ? 2 + number_of_digits : 0;
|
|
}
|
|
default:
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
auto match_string_prefix = [&](char quote) -> size_t {
|
|
if (peek() == quote)
|
|
return 1;
|
|
if (peek() == 'L' && peek(1) == quote)
|
|
return 2;
|
|
if (peek() == 'u') {
|
|
if (peek(1) == quote)
|
|
return 2;
|
|
if (peek(1) == '8' && peek(2) == quote)
|
|
return 3;
|
|
}
|
|
if (peek() == 'U' && peek(1) == quote)
|
|
return 2;
|
|
return 0;
|
|
};
|
|
|
|
while (m_index < m_input.length()) {
|
|
auto ch = peek();
|
|
if (isspace(ch)) {
|
|
begin_token();
|
|
while (isspace(peek()))
|
|
consume();
|
|
commit_token(Token::Type::Whitespace);
|
|
continue;
|
|
}
|
|
if (ch == '(') {
|
|
emit_token(Token::Type::LeftParen);
|
|
continue;
|
|
}
|
|
if (ch == ')') {
|
|
emit_token(Token::Type::RightParen);
|
|
continue;
|
|
}
|
|
if (ch == '{') {
|
|
emit_token(Token::Type::LeftCurly);
|
|
continue;
|
|
}
|
|
if (ch == '}') {
|
|
emit_token(Token::Type::RightCurly);
|
|
continue;
|
|
}
|
|
if (ch == '[') {
|
|
emit_token(Token::Type::LeftBracket);
|
|
continue;
|
|
}
|
|
if (ch == ']') {
|
|
emit_token(Token::Type::RightBracket);
|
|
continue;
|
|
}
|
|
if (ch == '<') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '<') {
|
|
consume();
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::LessLessEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::LessLess);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::LessEquals);
|
|
continue;
|
|
}
|
|
if (peek() == '>') {
|
|
consume();
|
|
commit_token(Token::Type::LessGreater);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Less);
|
|
continue;
|
|
}
|
|
if (ch == '>') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '>') {
|
|
consume();
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::GreaterGreaterEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::GreaterGreater);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::GreaterEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Greater);
|
|
continue;
|
|
}
|
|
if (ch == ',') {
|
|
emit_token(Token::Type::Comma);
|
|
continue;
|
|
}
|
|
if (ch == '+') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '+') {
|
|
consume();
|
|
commit_token(Token::Type::PlusPlus);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::PlusEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Plus);
|
|
continue;
|
|
}
|
|
if (ch == '-') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '-') {
|
|
consume();
|
|
commit_token(Token::Type::MinusMinus);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::MinusEquals);
|
|
continue;
|
|
}
|
|
if (peek() == '>') {
|
|
consume();
|
|
if (peek() == '*') {
|
|
consume();
|
|
commit_token(Token::Type::ArrowAsterisk);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Arrow);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Minus);
|
|
continue;
|
|
}
|
|
if (ch == '*') {
|
|
emit_token_equals(Token::Type::Asterisk, Token::Type::AsteriskEquals);
|
|
continue;
|
|
}
|
|
if (ch == '%') {
|
|
emit_token_equals(Token::Type::Percent, Token::Type::PercentEquals);
|
|
continue;
|
|
}
|
|
if (ch == '^') {
|
|
emit_token_equals(Token::Type::Caret, Token::Type::CaretEquals);
|
|
continue;
|
|
}
|
|
if (ch == '!') {
|
|
emit_token_equals(Token::Type::ExclamationMark, Token::Type::ExclamationMarkEquals);
|
|
continue;
|
|
}
|
|
if (ch == '=') {
|
|
emit_token_equals(Token::Type::Equals, Token::Type::EqualsEquals);
|
|
continue;
|
|
}
|
|
if (ch == '&') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '&') {
|
|
consume();
|
|
commit_token(Token::Type::AndAnd);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::AndEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::And);
|
|
continue;
|
|
}
|
|
if (ch == '|') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '|') {
|
|
consume();
|
|
commit_token(Token::Type::PipePipe);
|
|
continue;
|
|
}
|
|
if (peek() == '=') {
|
|
consume();
|
|
commit_token(Token::Type::PipeEquals);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Pipe);
|
|
continue;
|
|
}
|
|
if (ch == '~') {
|
|
emit_token(Token::Type::Tilde);
|
|
continue;
|
|
}
|
|
if (ch == '?') {
|
|
emit_token(Token::Type::QuestionMark);
|
|
continue;
|
|
}
|
|
if (ch == ':') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == ':') {
|
|
consume();
|
|
if (peek() == '*') {
|
|
consume();
|
|
commit_token(Token::Type::ColonColonAsterisk);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::ColonColon);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Colon);
|
|
continue;
|
|
}
|
|
if (ch == ';') {
|
|
emit_token(Token::Type::Semicolon);
|
|
continue;
|
|
}
|
|
if (ch == '.') {
|
|
begin_token();
|
|
consume();
|
|
if (peek() == '*') {
|
|
consume();
|
|
commit_token(Token::Type::DotAsterisk);
|
|
continue;
|
|
}
|
|
commit_token(Token::Type::Dot);
|
|
continue;
|
|
}
|
|
if (ch == '#') {
|
|
begin_token();
|
|
consume();
|
|
|
|
if (is_valid_first_character_of_identifier(peek()))
|
|
while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
|
|
consume();
|
|
|
|
auto directive = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
|
|
if (directive == "#include") {
|
|
commit_token(Token::Type::IncludeStatement);
|
|
|
|
begin_token();
|
|
while (isspace(peek()))
|
|
consume();
|
|
commit_token(Token::Type::Whitespace);
|
|
|
|
begin_token();
|
|
if (peek() == '<' || peek() == '"') {
|
|
char closing = consume() == '<' ? '>' : '"';
|
|
while (peek() && peek() != closing && peek() != '\n')
|
|
consume();
|
|
|
|
if (peek() && consume() == '\n') {
|
|
commit_token(Token::Type::IncludePath);
|
|
continue;
|
|
}
|
|
|
|
commit_token(Token::Type::IncludePath);
|
|
begin_token();
|
|
}
|
|
} else {
|
|
while (peek() && peek() != '\n')
|
|
consume();
|
|
|
|
commit_token(Token::Type::PreprocessorStatement);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
if (ch == '/' && peek(1) == '/') {
|
|
begin_token();
|
|
while (peek() && peek() != '\n')
|
|
consume();
|
|
commit_token(Token::Type::Comment);
|
|
continue;
|
|
}
|
|
if (ch == '/' && peek(1) == '*') {
|
|
begin_token();
|
|
consume();
|
|
consume();
|
|
bool comment_block_ends = false;
|
|
while (peek()) {
|
|
if (peek() == '*' && peek(1) == '/') {
|
|
comment_block_ends = true;
|
|
break;
|
|
}
|
|
|
|
consume();
|
|
}
|
|
|
|
if (comment_block_ends) {
|
|
consume();
|
|
consume();
|
|
}
|
|
|
|
commit_token(Token::Type::Comment);
|
|
continue;
|
|
}
|
|
if (ch == '/') {
|
|
emit_token_equals(Token::Type::Slash, Token::Type::SlashEquals);
|
|
continue;
|
|
}
|
|
if (size_t prefix = match_string_prefix('"'); prefix > 0) {
|
|
begin_token();
|
|
for (size_t i = 0; i < prefix; ++i)
|
|
consume();
|
|
while (peek()) {
|
|
if (peek() == '\\') {
|
|
if (size_t escape = match_escape_sequence(); escape > 0) {
|
|
commit_token(Token::Type::DoubleQuotedString);
|
|
begin_token();
|
|
for (size_t i = 0; i < escape; ++i)
|
|
consume();
|
|
commit_token(Token::Type::EscapeSequence);
|
|
begin_token();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (consume() == '"')
|
|
break;
|
|
}
|
|
commit_token(Token::Type::DoubleQuotedString);
|
|
continue;
|
|
}
|
|
if (size_t prefix = match_string_prefix('R'); prefix > 0 && peek(prefix) == '"') {
|
|
begin_token();
|
|
for (size_t i = 0; i < prefix + 1; ++i)
|
|
consume();
|
|
size_t prefix_start = m_index;
|
|
while (peek() && peek() != '(')
|
|
consume();
|
|
StringView prefix_string = m_input.substring_view(prefix_start, m_index - prefix_start);
|
|
while (peek()) {
|
|
if (consume() == '"') {
|
|
ASSERT(m_index >= prefix_string.length() + 2);
|
|
ASSERT(m_input[m_index - 1] == '"');
|
|
if (m_input[m_index - 1 - prefix_string.length() - 1] == ')') {
|
|
StringView suffix_string = m_input.substring_view(m_index - 1 - prefix_string.length(), prefix_string.length());
|
|
if (prefix_string == suffix_string)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
commit_token(Token::Type::RawString);
|
|
continue;
|
|
}
|
|
if (size_t prefix = match_string_prefix('\''); prefix > 0) {
|
|
begin_token();
|
|
for (size_t i = 0; i < prefix; ++i)
|
|
consume();
|
|
while (peek()) {
|
|
if (peek() == '\\') {
|
|
if (size_t escape = match_escape_sequence(); escape > 0) {
|
|
commit_token(Token::Type::SingleQuotedString);
|
|
begin_token();
|
|
for (size_t i = 0; i < escape; ++i)
|
|
consume();
|
|
commit_token(Token::Type::EscapeSequence);
|
|
begin_token();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (consume() == '\'')
|
|
break;
|
|
}
|
|
commit_token(Token::Type::SingleQuotedString);
|
|
continue;
|
|
}
|
|
if (isdigit(ch) || (ch == '.' && isdigit(peek(1)))) {
|
|
begin_token();
|
|
consume();
|
|
|
|
auto type = ch == '.' ? Token::Type::Float : Token::Type::Integer;
|
|
bool is_hex = false;
|
|
bool is_binary = false;
|
|
|
|
auto match_exponent = [&]() -> size_t {
|
|
char ch = peek();
|
|
if (ch != 'e' && ch != 'E' && ch != 'p' && ch != 'P')
|
|
return 0;
|
|
|
|
type = Token::Type::Float;
|
|
size_t length = 1;
|
|
ch = peek(length);
|
|
if (ch == '+' || ch == '-') {
|
|
++length;
|
|
}
|
|
for (ch = peek(length); isdigit(ch); ch = peek(length)) {
|
|
++length;
|
|
}
|
|
return length;
|
|
};
|
|
|
|
auto match_type_literal = [&]() -> size_t {
|
|
size_t length = 0;
|
|
for (;;) {
|
|
char ch = peek(length);
|
|
if ((ch == 'u' || ch == 'U') && type == Token::Type::Integer) {
|
|
++length;
|
|
} else if ((ch == 'f' || ch == 'F') && !is_binary) {
|
|
type = Token::Type::Float;
|
|
++length;
|
|
} else if (ch == 'l' || ch == 'L') {
|
|
++length;
|
|
} else
|
|
return length;
|
|
}
|
|
};
|
|
|
|
if (peek() == 'b' || peek() == 'B') {
|
|
consume();
|
|
is_binary = true;
|
|
for (char ch = peek(); ch == '0' || ch == '1' || (ch == '\'' && peek(1) != '\''); ch = peek()) {
|
|
consume();
|
|
}
|
|
} else {
|
|
if (peek() == 'x' || peek() == 'X') {
|
|
consume();
|
|
is_hex = true;
|
|
}
|
|
|
|
for (char ch = peek(); (is_hex ? isxdigit(ch) : isdigit(ch)) || (ch == '\'' && peek(1) != '\'') || ch == '.'; ch = peek()) {
|
|
if (ch == '.') {
|
|
if (type == Token::Type::Integer) {
|
|
type = Token::Type::Float;
|
|
} else
|
|
break;
|
|
};
|
|
consume();
|
|
}
|
|
}
|
|
|
|
if (!is_binary) {
|
|
size_t length = match_exponent();
|
|
for (size_t i = 0; i < length; ++i)
|
|
consume();
|
|
}
|
|
|
|
size_t length = match_type_literal();
|
|
for (size_t i = 0; i < length; ++i)
|
|
consume();
|
|
|
|
commit_token(type);
|
|
continue;
|
|
}
|
|
if (is_valid_first_character_of_identifier(ch)) {
|
|
begin_token();
|
|
while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
|
|
consume();
|
|
auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
|
|
if (is_keyword(token_view))
|
|
commit_token(Token::Type::Keyword);
|
|
else if (is_known_type(token_view))
|
|
commit_token(Token::Type::KnownType);
|
|
else
|
|
commit_token(Token::Type::Identifier);
|
|
continue;
|
|
}
|
|
dbgln("Unimplemented token character: {}", ch);
|
|
emit_token(Token::Type::Unknown);
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
bool Position::operator<(const Position& other) const
|
|
{
|
|
return line < other.line || (line == other.line && column < other.column);
|
|
}
|
|
bool Position::operator>(const Position& other) const
|
|
{
|
|
return !(*this < other) && !(*this == other);
|
|
}
|
|
bool Position::operator==(const Position& other) const
|
|
{
|
|
return line == other.line && column == other.column;
|
|
}
|
|
|
|
}
|