mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 13:48:12 +00:00
LibCpp: Add library for working with c++ code
Moved the C++ Lexer we have from LibGUI to here, so that other components could use it without linking with LibGUI.
This commit is contained in:
parent
fec4152220
commit
bf53d7ff64
8 changed files with 132 additions and 126 deletions
789
Libraries/LibCpp/Lexer.cpp
Normal file
789
Libraries/LibCpp/Lexer.cpp
Normal file
|
@ -0,0 +1,789 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "Lexer.h"
|
||||
#include <AK/HashTable.h>
|
||||
#include <AK/StdLibExtras.h>
|
||||
#include <AK/String.h>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace Cpp {
|
||||
|
||||
Lexer::Lexer(const StringView& input)
|
||||
: m_input(input)
|
||||
{
|
||||
}
|
||||
|
||||
char Lexer::peek(size_t offset) const
|
||||
{
|
||||
if ((m_index + offset) >= m_input.length())
|
||||
return 0;
|
||||
return m_input[m_index + offset];
|
||||
}
|
||||
|
||||
char Lexer::consume()
|
||||
{
|
||||
ASSERT(m_index < m_input.length());
|
||||
char ch = m_input[m_index++];
|
||||
m_previous_position = m_position;
|
||||
if (ch == '\n') {
|
||||
m_position.line++;
|
||||
m_position.column = 0;
|
||||
} else {
|
||||
m_position.column++;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
static bool is_valid_first_character_of_identifier(char ch)
|
||||
{
|
||||
return isalpha(ch) || ch == '_' || ch == '$';
|
||||
}
|
||||
|
||||
static bool is_valid_nonfirst_character_of_identifier(char ch)
|
||||
{
|
||||
return is_valid_first_character_of_identifier(ch) || isdigit(ch);
|
||||
}
|
||||
|
||||
constexpr const char* s_known_keywords[] = {
|
||||
"alignas",
|
||||
"alignof",
|
||||
"and",
|
||||
"and_eq",
|
||||
"asm",
|
||||
"bitand",
|
||||
"bitor",
|
||||
"bool",
|
||||
"break",
|
||||
"case",
|
||||
"catch",
|
||||
"class",
|
||||
"compl",
|
||||
"const",
|
||||
"const_cast",
|
||||
"constexpr",
|
||||
"continue",
|
||||
"decltype",
|
||||
"default",
|
||||
"delete",
|
||||
"do",
|
||||
"dynamic_cast",
|
||||
"else",
|
||||
"enum",
|
||||
"explicit",
|
||||
"export",
|
||||
"extern",
|
||||
"false",
|
||||
"final",
|
||||
"for",
|
||||
"friend",
|
||||
"goto",
|
||||
"if",
|
||||
"inline",
|
||||
"mutable",
|
||||
"namespace",
|
||||
"new",
|
||||
"noexcept",
|
||||
"not",
|
||||
"not_eq",
|
||||
"nullptr",
|
||||
"operator",
|
||||
"or",
|
||||
"or_eq",
|
||||
"override",
|
||||
"private",
|
||||
"protected",
|
||||
"public",
|
||||
"register",
|
||||
"reinterpret_cast",
|
||||
"return",
|
||||
"signed",
|
||||
"sizeof",
|
||||
"static",
|
||||
"static_assert",
|
||||
"static_cast",
|
||||
"struct",
|
||||
"switch",
|
||||
"template",
|
||||
"this",
|
||||
"thread_local",
|
||||
"throw",
|
||||
"true",
|
||||
"try",
|
||||
"typedef",
|
||||
"typeid",
|
||||
"typename",
|
||||
"union",
|
||||
"using",
|
||||
"virtual",
|
||||
"volatile",
|
||||
"while",
|
||||
"xor",
|
||||
"xor_eq"
|
||||
};
|
||||
|
||||
constexpr const char* s_known_types[] = {
|
||||
"ByteBuffer",
|
||||
"CircularDeque",
|
||||
"CircularQueue",
|
||||
"Deque",
|
||||
"DoublyLinkedList",
|
||||
"FileSystemPath",
|
||||
"Array",
|
||||
"Function",
|
||||
"HashMap",
|
||||
"HashTable",
|
||||
"IPv4Address",
|
||||
"InlineLinkedList",
|
||||
"IntrusiveList",
|
||||
"JsonArray",
|
||||
"JsonObject",
|
||||
"JsonValue",
|
||||
"MappedFile",
|
||||
"NetworkOrdered",
|
||||
"NonnullOwnPtr",
|
||||
"NonnullOwnPtrVector",
|
||||
"NonnullRefPtr",
|
||||
"NonnullRefPtrVector",
|
||||
"Optional",
|
||||
"OwnPtr",
|
||||
"RefPtr",
|
||||
"Result",
|
||||
"ScopeGuard",
|
||||
"SinglyLinkedList",
|
||||
"String",
|
||||
"StringBuilder",
|
||||
"StringImpl",
|
||||
"StringView",
|
||||
"Utf8View",
|
||||
"Vector",
|
||||
"WeakPtr",
|
||||
"auto",
|
||||
"char",
|
||||
"char16_t",
|
||||
"char32_t",
|
||||
"char8_t",
|
||||
"double",
|
||||
"float",
|
||||
"i16",
|
||||
"i32",
|
||||
"i64",
|
||||
"i8",
|
||||
"int",
|
||||
"int",
|
||||
"long",
|
||||
"short",
|
||||
"signed",
|
||||
"u16",
|
||||
"u32",
|
||||
"u64",
|
||||
"u8",
|
||||
"unsigned",
|
||||
"void",
|
||||
"wchar_t"
|
||||
};
|
||||
|
||||
static bool is_keyword(const StringView& string)
|
||||
{
|
||||
static HashTable<String> keywords(array_size(s_known_keywords));
|
||||
if (keywords.is_empty()) {
|
||||
keywords.set_from(s_known_keywords);
|
||||
}
|
||||
return keywords.contains(string);
|
||||
}
|
||||
|
||||
static bool is_known_type(const StringView& string)
|
||||
{
|
||||
static HashTable<String> types(array_size(s_known_types));
|
||||
if (types.is_empty()) {
|
||||
types.set_from(s_known_types);
|
||||
}
|
||||
return types.contains(string);
|
||||
}
|
||||
|
||||
Vector<Token> Lexer::lex()
|
||||
{
|
||||
Vector<Token> tokens;
|
||||
|
||||
size_t token_start_index = 0;
|
||||
Position token_start_position;
|
||||
|
||||
auto emit_token = [&](auto type) {
|
||||
Token token;
|
||||
token.m_type = type;
|
||||
token.m_start = m_position;
|
||||
token.m_end = m_position;
|
||||
tokens.append(token);
|
||||
consume();
|
||||
};
|
||||
|
||||
auto begin_token = [&] {
|
||||
token_start_index = m_index;
|
||||
token_start_position = m_position;
|
||||
};
|
||||
auto commit_token = [&](auto type) {
|
||||
Token token;
|
||||
token.m_type = type;
|
||||
token.m_start = token_start_position;
|
||||
token.m_end = m_previous_position;
|
||||
tokens.append(token);
|
||||
};
|
||||
|
||||
auto emit_token_equals = [&](auto type, auto equals_type) {
|
||||
if (peek(1) == '=') {
|
||||
begin_token();
|
||||
consume();
|
||||
consume();
|
||||
commit_token(equals_type);
|
||||
return;
|
||||
}
|
||||
emit_token(type);
|
||||
};
|
||||
|
||||
auto match_escape_sequence = [&]() -> size_t {
|
||||
switch (peek(1)) {
|
||||
case '\'':
|
||||
case '"':
|
||||
case '?':
|
||||
case '\\':
|
||||
case 'a':
|
||||
case 'b':
|
||||
case 'f':
|
||||
case 'n':
|
||||
case 'r':
|
||||
case 't':
|
||||
case 'v':
|
||||
return 2;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7': {
|
||||
size_t octal_digits = 1;
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
char next = peek(2 + i);
|
||||
if (next < '0' || next > '7')
|
||||
break;
|
||||
++octal_digits;
|
||||
}
|
||||
return 1 + octal_digits;
|
||||
}
|
||||
case 'x': {
|
||||
size_t hex_digits = 0;
|
||||
while (isxdigit(peek(2 + hex_digits)))
|
||||
++hex_digits;
|
||||
return 2 + hex_digits;
|
||||
}
|
||||
case 'u':
|
||||
case 'U': {
|
||||
bool is_unicode = true;
|
||||
size_t number_of_digits = peek(1) == 'u' ? 4 : 8;
|
||||
for (size_t i = 0; i < number_of_digits; ++i) {
|
||||
if (!isxdigit(peek(2 + i))) {
|
||||
is_unicode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return is_unicode ? 2 + number_of_digits : 0;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
auto match_string_prefix = [&](char quote) -> size_t {
|
||||
if (peek() == quote)
|
||||
return 1;
|
||||
if (peek() == 'L' && peek(1) == quote)
|
||||
return 2;
|
||||
if (peek() == 'u') {
|
||||
if (peek(1) == quote)
|
||||
return 2;
|
||||
if (peek(1) == '8' && peek(2) == quote)
|
||||
return 3;
|
||||
}
|
||||
if (peek() == 'U' && peek(1) == quote)
|
||||
return 2;
|
||||
return 0;
|
||||
};
|
||||
|
||||
while (m_index < m_input.length()) {
|
||||
auto ch = peek();
|
||||
if (isspace(ch)) {
|
||||
begin_token();
|
||||
while (isspace(peek()))
|
||||
consume();
|
||||
commit_token(Token::Type::Whitespace);
|
||||
continue;
|
||||
}
|
||||
if (ch == '(') {
|
||||
emit_token(Token::Type::LeftParen);
|
||||
continue;
|
||||
}
|
||||
if (ch == ')') {
|
||||
emit_token(Token::Type::RightParen);
|
||||
continue;
|
||||
}
|
||||
if (ch == '{') {
|
||||
emit_token(Token::Type::LeftCurly);
|
||||
continue;
|
||||
}
|
||||
if (ch == '}') {
|
||||
emit_token(Token::Type::RightCurly);
|
||||
continue;
|
||||
}
|
||||
if (ch == '[') {
|
||||
emit_token(Token::Type::LeftBracket);
|
||||
continue;
|
||||
}
|
||||
if (ch == ']') {
|
||||
emit_token(Token::Type::RightBracket);
|
||||
continue;
|
||||
}
|
||||
if (ch == '<') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '<') {
|
||||
consume();
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::LessLessEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::LessLess);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::LessEquals);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '>') {
|
||||
consume();
|
||||
commit_token(Token::Type::LessGreater);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Less);
|
||||
continue;
|
||||
}
|
||||
if (ch == '>') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '>') {
|
||||
consume();
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::GreaterGreaterEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::GreaterGreater);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::GreaterEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Greater);
|
||||
continue;
|
||||
}
|
||||
if (ch == ',') {
|
||||
emit_token(Token::Type::Comma);
|
||||
continue;
|
||||
}
|
||||
if (ch == '+') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '+') {
|
||||
consume();
|
||||
commit_token(Token::Type::PlusPlus);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::PlusEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Plus);
|
||||
continue;
|
||||
}
|
||||
if (ch == '-') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '-') {
|
||||
consume();
|
||||
commit_token(Token::Type::MinusMinus);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::MinusEquals);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '>') {
|
||||
consume();
|
||||
if (peek() == '*') {
|
||||
consume();
|
||||
commit_token(Token::Type::ArrowAsterisk);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Arrow);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Minus);
|
||||
continue;
|
||||
}
|
||||
if (ch == '*') {
|
||||
emit_token_equals(Token::Type::Asterisk, Token::Type::AsteriskEquals);
|
||||
continue;
|
||||
}
|
||||
if (ch == '%') {
|
||||
emit_token_equals(Token::Type::Percent, Token::Type::PercentEquals);
|
||||
continue;
|
||||
}
|
||||
if (ch == '^') {
|
||||
emit_token_equals(Token::Type::Caret, Token::Type::CaretEquals);
|
||||
continue;
|
||||
}
|
||||
if (ch == '!') {
|
||||
emit_token_equals(Token::Type::ExclamationMark, Token::Type::ExclamationMarkEquals);
|
||||
continue;
|
||||
}
|
||||
if (ch == '=') {
|
||||
emit_token_equals(Token::Type::Equals, Token::Type::EqualsEquals);
|
||||
continue;
|
||||
}
|
||||
if (ch == '&') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '&') {
|
||||
consume();
|
||||
commit_token(Token::Type::AndAnd);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::AndEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::And);
|
||||
continue;
|
||||
}
|
||||
if (ch == '|') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '|') {
|
||||
consume();
|
||||
commit_token(Token::Type::PipePipe);
|
||||
continue;
|
||||
}
|
||||
if (peek() == '=') {
|
||||
consume();
|
||||
commit_token(Token::Type::PipeEquals);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Pipe);
|
||||
continue;
|
||||
}
|
||||
if (ch == '~') {
|
||||
emit_token(Token::Type::Tilde);
|
||||
continue;
|
||||
}
|
||||
if (ch == '?') {
|
||||
emit_token(Token::Type::QuestionMark);
|
||||
continue;
|
||||
}
|
||||
if (ch == ':') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == ':') {
|
||||
consume();
|
||||
if (peek() == '*') {
|
||||
consume();
|
||||
commit_token(Token::Type::ColonColonAsterisk);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::ColonColon);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Colon);
|
||||
continue;
|
||||
}
|
||||
if (ch == ';') {
|
||||
emit_token(Token::Type::Semicolon);
|
||||
continue;
|
||||
}
|
||||
if (ch == '.') {
|
||||
begin_token();
|
||||
consume();
|
||||
if (peek() == '*') {
|
||||
consume();
|
||||
commit_token(Token::Type::DotAsterisk);
|
||||
continue;
|
||||
}
|
||||
commit_token(Token::Type::Dot);
|
||||
continue;
|
||||
}
|
||||
if (ch == '#') {
|
||||
begin_token();
|
||||
consume();
|
||||
|
||||
if (is_valid_first_character_of_identifier(peek()))
|
||||
while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
|
||||
consume();
|
||||
|
||||
auto directive = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
|
||||
if (directive == "#include") {
|
||||
commit_token(Token::Type::IncludeStatement);
|
||||
|
||||
begin_token();
|
||||
while (isspace(peek()))
|
||||
consume();
|
||||
commit_token(Token::Type::Whitespace);
|
||||
|
||||
begin_token();
|
||||
if (peek() == '<' || peek() == '"') {
|
||||
char closing = consume() == '<' ? '>' : '"';
|
||||
while (peek() && peek() != closing && peek() != '\n')
|
||||
consume();
|
||||
|
||||
if (peek() && consume() == '\n') {
|
||||
commit_token(Token::Type::IncludePath);
|
||||
continue;
|
||||
}
|
||||
|
||||
commit_token(Token::Type::IncludePath);
|
||||
begin_token();
|
||||
}
|
||||
}
|
||||
|
||||
while (peek() && peek() != '\n')
|
||||
consume();
|
||||
|
||||
commit_token(Token::Type::PreprocessorStatement);
|
||||
continue;
|
||||
}
|
||||
if (ch == '/' && peek(1) == '/') {
|
||||
begin_token();
|
||||
while (peek() && peek() != '\n')
|
||||
consume();
|
||||
commit_token(Token::Type::Comment);
|
||||
continue;
|
||||
}
|
||||
if (ch == '/' && peek(1) == '*') {
|
||||
begin_token();
|
||||
consume();
|
||||
consume();
|
||||
bool comment_block_ends = false;
|
||||
while (peek()) {
|
||||
if (peek() == '*' && peek(1) == '/') {
|
||||
comment_block_ends = true;
|
||||
break;
|
||||
}
|
||||
|
||||
consume();
|
||||
}
|
||||
|
||||
if (comment_block_ends) {
|
||||
consume();
|
||||
consume();
|
||||
}
|
||||
|
||||
commit_token(Token::Type::Comment);
|
||||
continue;
|
||||
}
|
||||
if (ch == '/') {
|
||||
emit_token_equals(Token::Type::Slash, Token::Type::SlashEquals);
|
||||
continue;
|
||||
}
|
||||
if (size_t prefix = match_string_prefix('"'); prefix > 0) {
|
||||
begin_token();
|
||||
for (size_t i = 0; i < prefix; ++i)
|
||||
consume();
|
||||
while (peek()) {
|
||||
if (peek() == '\\') {
|
||||
if (size_t escape = match_escape_sequence(); escape > 0) {
|
||||
commit_token(Token::Type::DoubleQuotedString);
|
||||
begin_token();
|
||||
for (size_t i = 0; i < escape; ++i)
|
||||
consume();
|
||||
commit_token(Token::Type::EscapeSequence);
|
||||
begin_token();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (consume() == '"')
|
||||
break;
|
||||
}
|
||||
commit_token(Token::Type::DoubleQuotedString);
|
||||
continue;
|
||||
}
|
||||
if (size_t prefix = match_string_prefix('R'); prefix > 0 && peek(prefix) == '"') {
|
||||
begin_token();
|
||||
for (size_t i = 0; i < prefix + 1; ++i)
|
||||
consume();
|
||||
size_t prefix_start = m_index;
|
||||
while (peek() && peek() != '(')
|
||||
consume();
|
||||
StringView prefix_string = m_input.substring_view(prefix_start, m_index - prefix_start);
|
||||
while (peek()) {
|
||||
if (consume() == '"') {
|
||||
ASSERT(m_index >= prefix_string.length() + 2);
|
||||
ASSERT(m_input[m_index - 1] == '"');
|
||||
if (m_input[m_index - 1 - prefix_string.length() - 1] == ')') {
|
||||
StringView suffix_string = m_input.substring_view(m_index - 1 - prefix_string.length(), prefix_string.length());
|
||||
if (prefix_string == suffix_string)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
commit_token(Token::Type::RawString);
|
||||
continue;
|
||||
}
|
||||
if (size_t prefix = match_string_prefix('\''); prefix > 0) {
|
||||
begin_token();
|
||||
for (size_t i = 0; i < prefix; ++i)
|
||||
consume();
|
||||
while (peek()) {
|
||||
if (peek() == '\\') {
|
||||
if (size_t escape = match_escape_sequence(); escape > 0) {
|
||||
commit_token(Token::Type::SingleQuotedString);
|
||||
begin_token();
|
||||
for (size_t i = 0; i < escape; ++i)
|
||||
consume();
|
||||
commit_token(Token::Type::EscapeSequence);
|
||||
begin_token();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (consume() == '\'')
|
||||
break;
|
||||
}
|
||||
commit_token(Token::Type::SingleQuotedString);
|
||||
continue;
|
||||
}
|
||||
if (isdigit(ch) || (ch == '.' && isdigit(peek(1)))) {
|
||||
begin_token();
|
||||
consume();
|
||||
|
||||
auto type = ch == '.' ? Token::Type::Float : Token::Type::Integer;
|
||||
bool is_hex = false;
|
||||
bool is_binary = false;
|
||||
|
||||
auto match_exponent = [&]() -> size_t {
|
||||
char ch = peek();
|
||||
if (ch != 'e' && ch != 'E' && ch != 'p' && ch != 'P')
|
||||
return 0;
|
||||
|
||||
type = Token::Type::Float;
|
||||
size_t length = 1;
|
||||
ch = peek(length);
|
||||
if (ch == '+' || ch == '-') {
|
||||
++length;
|
||||
}
|
||||
for (ch = peek(length); isdigit(ch); ch = peek(length)) {
|
||||
++length;
|
||||
}
|
||||
return length;
|
||||
};
|
||||
|
||||
auto match_type_literal = [&]() -> size_t {
|
||||
size_t length = 0;
|
||||
for (;;) {
|
||||
char ch = peek(length);
|
||||
if ((ch == 'u' || ch == 'U') && type == Token::Type::Integer) {
|
||||
++length;
|
||||
} else if ((ch == 'f' || ch == 'F') && !is_binary) {
|
||||
type = Token::Type::Float;
|
||||
++length;
|
||||
} else if (ch == 'l' || ch == 'L') {
|
||||
++length;
|
||||
} else
|
||||
return length;
|
||||
}
|
||||
};
|
||||
|
||||
if (peek() == 'b' || peek() == 'B') {
|
||||
consume();
|
||||
is_binary = true;
|
||||
for (char ch = peek(); ch == '0' || ch == '1' || (ch == '\'' && peek(1) != '\''); ch = peek()) {
|
||||
consume();
|
||||
}
|
||||
} else {
|
||||
if (peek() == 'x' || peek() == 'X') {
|
||||
consume();
|
||||
is_hex = true;
|
||||
}
|
||||
|
||||
for (char ch = peek(); (is_hex ? isxdigit(ch) : isdigit(ch)) || (ch == '\'' && peek(1) != '\'') || ch == '.'; ch = peek()) {
|
||||
if (ch == '.') {
|
||||
if (type == Token::Type::Integer) {
|
||||
type = Token::Type::Float;
|
||||
} else
|
||||
break;
|
||||
};
|
||||
consume();
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_binary) {
|
||||
size_t length = match_exponent();
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
consume();
|
||||
}
|
||||
|
||||
size_t length = match_type_literal();
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
consume();
|
||||
|
||||
commit_token(type);
|
||||
continue;
|
||||
}
|
||||
if (is_valid_first_character_of_identifier(ch)) {
|
||||
begin_token();
|
||||
while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
|
||||
consume();
|
||||
auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
|
||||
if (is_keyword(token_view))
|
||||
commit_token(Token::Type::Keyword);
|
||||
else if (is_known_type(token_view))
|
||||
commit_token(Token::Type::KnownType);
|
||||
else
|
||||
commit_token(Token::Type::Identifier);
|
||||
continue;
|
||||
}
|
||||
dbg() << "Unimplemented token character: " << ch;
|
||||
emit_token(Token::Type::Unknown);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue