1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-10-25 01:42:06 +00:00
serenity/Userland/Libraries/LibCpp/Parser.cpp
Itamar 4f1c77a059 LibCpp: Fix end position calculation for various AST node types
Previously, the end position of the Type and Name nodes was incorrect.
It was incorrectly set to the start position of the next unconsumed
token.

This commit adds a previous_token_end() method and uses it to correctly
get the end position for these node types.
2022-02-09 00:51:31 +01:00

1707 lines
50 KiB
C++

/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Parser.h"
#include "AST.h"
#include <AK/Debug.h>
#include <AK/ScopeGuard.h>
#include <AK/ScopeLogger.h>
#include <LibCpp/Lexer.h>
#define LOG_SCOPE() ScopeLogger<CPP_DEBUG> logger(String::formatted("'{}' - {} ({})", peek().text(), peek().type_as_string(), m_state.token_index))
namespace Cpp {
Parser::Parser(Vector<Token> tokens, String const& filename)
: m_filename(filename)
, m_tokens(move(tokens))
{
if constexpr (CPP_DEBUG) {
dbgln("Tokens:");
for (size_t i = 0; i < m_tokens.size(); ++i) {
dbgln("{}- {}", i, m_tokens[i].to_string());
}
}
}
NonnullRefPtr<TranslationUnit> Parser::parse()
{
LOG_SCOPE();
if (m_tokens.is_empty())
return create_root_ast_node({}, {});
auto unit = create_root_ast_node(m_tokens.first().start(), m_tokens.last().end());
unit->set_declarations(parse_declarations_in_translation_unit(*unit));
return unit;
}
NonnullRefPtrVector<Declaration> Parser::parse_declarations_in_translation_unit(ASTNode& parent)
{
NonnullRefPtrVector<Declaration> declarations;
while (!eof()) {
auto declaration = parse_single_declaration_in_translation_unit(parent);
if (declaration) {
declarations.append(declaration.release_nonnull());
} else {
error("unexpected token");
consume();
}
}
return declarations;
}
RefPtr<Declaration> Parser::parse_single_declaration_in_translation_unit(ASTNode& parent)
{
while (!eof()) {
if (match_comment()) {
consume(Token::Type::Comment);
continue;
}
if (match_preprocessor()) {
consume_preprocessor();
continue;
}
auto declaration = match_declaration_in_translation_unit();
if (declaration.has_value()) {
return parse_declaration(parent, declaration.value());
}
return {};
}
return {};
}
NonnullRefPtr<Declaration> Parser::parse_declaration(ASTNode& parent, DeclarationType declaration_type)
{
switch (declaration_type) {
case DeclarationType::Function:
return parse_function_declaration(parent);
case DeclarationType::Variable:
return parse_variable_declaration(parent);
case DeclarationType::Enum:
return parse_enum_declaration(parent);
case DeclarationType::Class:
return parse_class_declaration(parent);
case DeclarationType::Namespace:
return parse_namespace_declaration(parent);
case DeclarationType::Constructor:
return parse_constructor(parent);
case DeclarationType::Destructor:
return parse_destructor(parent);
default:
error("unexpected declaration type");
return create_ast_node<InvalidDeclaration>(parent, position(), position());
}
}
NonnullRefPtr<FunctionDeclaration> Parser::parse_function_declaration(ASTNode& parent)
{
auto func = create_ast_node<FunctionDeclaration>(parent, position(), {});
func->set_qualifiers(parse_function_qualifiers());
func->set_return_type(parse_type(*func));
auto function_name = consume(Token::Type::Identifier);
func->set_name(text_of_token(function_name));
consume(Token::Type::LeftParen);
auto parameters = parse_parameter_list(*func);
if (parameters.has_value())
func->set_parameters(parameters.value());
consume(Token::Type::RightParen);
while (match_keyword("const") || match_keyword("override")) {
consume();
// FIXME: Note that this function is supposed to be a class member, and `this` has to be const, somehow.
}
RefPtr<FunctionDefinition> body;
Position func_end {};
if (peek(Token::Type::LeftCurly).has_value()) {
body = parse_function_definition(*func);
func_end = body->end();
} else {
func_end = position();
if (match_attribute_specification())
consume_attribute_specification(); // we don't use the value of __attribute__
consume(Token::Type::Semicolon);
}
func->set_definition(move(body));
func->set_end(func_end);
return func;
}
NonnullRefPtr<FunctionDefinition> Parser::parse_function_definition(ASTNode& parent)
{
LOG_SCOPE();
auto func = create_ast_node<FunctionDefinition>(parent, position(), {});
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
func->add_statement(parse_statement(func));
}
func->set_end(position());
if (!eof())
consume(Token::Type::RightCurly);
return func;
}
NonnullRefPtr<Statement> Parser::parse_statement(ASTNode& parent)
{
LOG_SCOPE();
ArmedScopeGuard consume_semicolon([this]() {
consume(Token::Type::Semicolon);
});
if (match_block_statement()) {
consume_semicolon.disarm();
return parse_block_statement(parent);
}
if (match_comment()) {
consume_semicolon.disarm();
return parse_comment(parent);
}
if (match_variable_declaration()) {
return parse_variable_declaration(parent, false);
}
if (match_expression()) {
return parse_expression(parent);
}
if (match_keyword("return")) {
return parse_return_statement(parent);
}
if (match_keyword("for")) {
consume_semicolon.disarm();
return parse_for_statement(parent);
}
if (match_keyword("if")) {
consume_semicolon.disarm();
return parse_if_statement(parent);
} else {
error("unexpected statement type");
consume_semicolon.disarm();
consume();
return create_ast_node<InvalidStatement>(parent, position(), position());
}
}
NonnullRefPtr<Comment> Parser::parse_comment(ASTNode& parent)
{
auto comment = create_ast_node<Comment>(parent, position(), {});
consume(Token::Type::Comment);
comment->set_end(position());
return comment;
}
bool Parser::match_block_statement()
{
return peek().type() == Token::Type::LeftCurly;
}
NonnullRefPtr<BlockStatement> Parser::parse_block_statement(ASTNode& parent)
{
LOG_SCOPE();
auto block_statement = create_ast_node<BlockStatement>(parent, position(), {});
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
block_statement->add_statement(parse_statement(*block_statement));
}
consume(Token::Type::RightCurly);
block_statement->set_end(position());
return block_statement;
}
bool Parser::match_type()
{
return match_named_type();
}
bool Parser::match_named_type()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
parse_type_qualifiers();
if (match_keyword("auto")) {
return true;
}
if (match_keyword("struct")) {
consume(Token::Type::Keyword); // Consume struct prefix
}
if (!match_name())
return false;
return true;
}
bool Parser::match_template_arguments()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (!peek(Token::Type::Less).has_value())
return false;
consume();
while (!eof() && peek().type() != Token::Type::Greater) {
if (!match_named_type())
return false;
(void)parse_type(get_dummy_node());
}
return peek().type() == Token::Type::Greater;
}
NonnullRefPtrVector<Type> Parser::parse_template_arguments(ASTNode& parent)
{
LOG_SCOPE();
consume(Token::Type::Less);
NonnullRefPtrVector<Type> template_arguments;
while (!eof() && peek().type() != Token::Type::Greater) {
template_arguments.append(parse_type(parent));
}
consume(Token::Type::Greater);
return template_arguments;
}
bool Parser::match_variable_declaration()
{
LOG_SCOPE();
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (!match_type()) {
return false;
}
VERIFY(m_root_node);
(void)parse_type(get_dummy_node());
// Identifier
if (!match_name())
return false;
(void)parse_name(get_dummy_node());
if (match(Token::Type::Equals)) {
consume(Token::Type::Equals);
if (!match_expression()) {
error("initial value of variable is not an expression");
return false;
}
return true;
}
if (match_braced_init_list())
(void)parse_braced_init_list(get_dummy_node());
return match(Token::Type::Semicolon);
}
NonnullRefPtr<VariableDeclaration> Parser::parse_variable_declaration(ASTNode& parent, bool expect_semicolon)
{
LOG_SCOPE();
auto var = create_ast_node<VariableDeclaration>(parent, position(), {});
if (!match_variable_declaration()) {
error("unexpected token for variable type");
var->set_end(position());
return var;
}
var->set_type(parse_type(var));
auto identifier_token = consume(Token::Type::Identifier);
RefPtr<Expression> initial_value;
if (match(Token::Type::Equals)) {
consume(Token::Type::Equals);
initial_value = parse_expression(var);
}
if (match_braced_init_list()) {
initial_value = parse_braced_init_list(var);
}
if (expect_semicolon)
consume(Token::Type::Semicolon);
var->set_end(position());
var->set_name(text_of_token(identifier_token));
var->set_initial_value(move(initial_value));
return var;
}
NonnullRefPtr<Expression> Parser::parse_expression(ASTNode& parent)
{
LOG_SCOPE();
auto expression = parse_primary_expression(parent);
// TODO: remove eof() logic, should still work without it
if (eof() || match(Token::Type::Semicolon)) {
return expression;
}
NonnullRefPtrVector<Expression> secondary_expressions;
while (match_secondary_expression()) {
// FIXME: Handle operator precedence
expression = parse_secondary_expression(parent, expression);
secondary_expressions.append(expression);
}
for (size_t i = 0; secondary_expressions.size() != 0 && i < secondary_expressions.size() - 1; ++i) {
secondary_expressions[i].set_parent(secondary_expressions[i + 1]);
}
return expression;
}
bool Parser::match_secondary_expression()
{
auto type = peek().type();
return type == Token::Type::Plus
|| type == Token::Type::PlusEquals
|| type == Token::Type::Minus
|| type == Token::Type::MinusEquals
|| type == Token::Type::Asterisk
|| type == Token::Type::AsteriskEquals
|| type == Token::Type::Percent
|| type == Token::Type::PercentEquals
|| type == Token::Type::Equals
|| type == Token::Type::Greater
|| type == Token::Type::GreaterEquals
|| type == Token::Type::Less
|| type == Token::Type::LessEquals
|| type == Token::Type::Dot
|| type == Token::Type::PlusPlus
|| type == Token::Type::MinusMinus
|| type == Token::Type::And
|| type == Token::Type::AndEquals
|| type == Token::Type::Pipe
|| type == Token::Type::PipeEquals
|| type == Token::Type::Caret
|| type == Token::Type::CaretEquals
|| type == Token::Type::LessLess
|| type == Token::Type::LessLessEquals
|| type == Token::Type::GreaterGreater
|| type == Token::Type::GreaterGreaterEquals
|| type == Token::Type::EqualsEquals
|| type == Token::Type::AndAnd
|| type == Token::Type::PipePipe
|| type == Token::Type::ExclamationMarkEquals
|| type == Token::Type::Arrow
|| type == Token::Type::LeftParen;
}
NonnullRefPtr<Expression> Parser::parse_primary_expression(ASTNode& parent)
{
LOG_SCOPE();
// TODO: remove eof() logic, should still work without it
if (eof()) {
auto node = create_ast_node<Identifier>(parent, position(), position());
return node;
}
if (match_unary_expression())
return parse_unary_expression(parent);
if (match_literal()) {
return parse_literal(parent);
}
if (match_cpp_cast_expression())
return parse_cpp_cast_expression(parent);
if (match_c_style_cast_expression())
return parse_c_style_cast_expression(parent);
if (match_sizeof_expression())
return parse_sizeof_expression(parent);
if (match_braced_init_list())
return parse_braced_init_list(parent);
if (match_name()) {
return parse_name(parent);
}
error("could not parse primary expression");
auto token = consume();
return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
}
bool Parser::match_literal()
{
switch (peek().type()) {
case Token::Type::Integer:
return true;
case Token::Type::SingleQuotedString:
return true;
case Token::Type::DoubleQuotedString:
return true;
case Token::Type::Float:
return true;
case Token::Type::Keyword: {
return match_boolean_literal() || peek().text() == "nullptr";
}
default:
return false;
}
}
bool Parser::match_unary_expression()
{
auto type = peek().type();
return type == Token::Type::PlusPlus
|| type == Token::Type::MinusMinus
|| type == Token::Type::ExclamationMark
|| type == Token::Type::Tilde
|| type == Token::Type::Plus
|| type == Token::Type::Minus
|| type == Token::Type::And;
}
NonnullRefPtr<UnaryExpression> Parser::parse_unary_expression(ASTNode& parent)
{
auto unary_exp = create_ast_node<UnaryExpression>(parent, position(), {});
auto op_token = consume();
UnaryOp op { UnaryOp::Invalid };
switch (op_token.type()) {
case Token::Type::Minus:
op = UnaryOp::Minus;
break;
case Token::Type::Plus:
op = UnaryOp::Plus;
break;
case Token::Type::ExclamationMark:
op = UnaryOp::Not;
break;
case Token::Type::Tilde:
op = UnaryOp::BitwiseNot;
break;
case Token::Type::PlusPlus:
op = UnaryOp::PlusPlus;
break;
case Token::Type::And:
op = UnaryOp::Address;
break;
default:
break;
}
unary_exp->set_op(op);
auto lhs = parse_expression(*unary_exp);
unary_exp->set_lhs(lhs);
unary_exp->set_end(lhs->end());
return unary_exp;
}
NonnullRefPtr<Expression> Parser::parse_literal(ASTNode& parent)
{
switch (peek().type()) {
case Token::Type::Integer: {
auto token = consume();
return create_ast_node<NumericLiteral>(parent, token.start(), token.end(), text_of_token(token));
}
case Token::Type::SingleQuotedString:
[[fallthrough]];
case Token::Type::DoubleQuotedString:
return parse_string_literal(parent);
case Token::Type::Keyword: {
if (match_boolean_literal())
return parse_boolean_literal(parent);
if (peek().text() == "nullptr") {
auto token = consume();
return create_ast_node<NullPointerLiteral>(parent, token.start(), token.end());
}
[[fallthrough]];
}
default: {
error("could not parse literal");
auto token = consume();
return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
}
}
}
NonnullRefPtr<Expression> Parser::parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs)
{
LOG_SCOPE();
switch (peek().type()) {
case Token::Type::Plus:
return parse_binary_expression(parent, lhs, BinaryOp::Addition);
case Token::Type::Less:
return parse_binary_expression(parent, lhs, BinaryOp::LessThan);
case Token::Type::EqualsEquals:
return parse_binary_expression(parent, lhs, BinaryOp::EqualsEquals);
case Token::Type::ExclamationMarkEquals:
return parse_binary_expression(parent, lhs, BinaryOp::NotEqual);
case Token::Type::And:
return parse_binary_expression(parent, lhs, BinaryOp::BitwiseAnd);
case Token::Type::AndAnd:
return parse_binary_expression(parent, lhs, BinaryOp::LogicalAnd);
case Token::Type::Pipe:
return parse_binary_expression(parent, lhs, BinaryOp::BitwiseOr);
case Token::Type::PipePipe:
return parse_binary_expression(parent, lhs, BinaryOp::LogicalOr);
case Token::Type::Arrow:
return parse_binary_expression(parent, lhs, BinaryOp::Arrow);
case Token::Type::Equals:
return parse_assignment_expression(parent, lhs, AssignmentOp::Assignment);
case Token::Type::Dot: {
consume();
auto exp = create_ast_node<MemberExpression>(parent, lhs->start(), {});
lhs->set_parent(*exp);
exp->set_object(move(lhs));
auto identifier_token = consume(Token::Type::Identifier);
exp->set_property(create_ast_node<Identifier>(*exp, identifier_token.start(), identifier_token.end(), identifier_token.text()));
exp->set_end(position());
return exp;
}
case Token::Type::LeftParen: {
consume();
auto func = create_ast_node<FunctionCall>(parent, lhs->start(), {});
lhs->set_parent(*func);
func->set_callee(move(lhs));
while (peek().type() != Token::Type::RightParen && !eof()) {
func->add_argument(parse_expression(*func));
if (peek().type() == Token::Type::Comma)
consume(Token::Type::Comma);
}
consume(Token::Type::RightParen);
func->set_end(position());
return func;
}
default: {
error(String::formatted("unexpected operator for expression. operator: {}", peek().to_string()));
auto token = consume();
return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
}
}
}
NonnullRefPtr<BinaryExpression> Parser::parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp op)
{
consume(); // Operator
auto exp = create_ast_node<BinaryExpression>(parent, lhs->start(), {});
lhs->set_parent(*exp);
exp->set_op(op);
exp->set_lhs(move(lhs));
auto rhs = parse_expression(exp);
exp->set_end(rhs->end());
exp->set_rhs(move(rhs));
return exp;
}
NonnullRefPtr<AssignmentExpression> Parser::parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp op)
{
consume(); // Operator
auto exp = create_ast_node<AssignmentExpression>(parent, lhs->start(), {});
lhs->set_parent(*exp);
exp->set_op(op);
exp->set_lhs(move(lhs));
auto rhs = parse_expression(exp);
exp->set_end(rhs->end());
exp->set_rhs(move(rhs));
return exp;
}
Optional<Parser::DeclarationType> Parser::match_declaration_in_translation_unit()
{
if (match_function_declaration())
return DeclarationType::Function;
if (match_enum_declaration())
return DeclarationType::Enum;
if (match_class_declaration())
return DeclarationType::Class;
if (match_namespace_declaration())
return DeclarationType::Namespace;
if (match_variable_declaration())
return DeclarationType::Variable;
return {};
}
Optional<Parser::DeclarationType> Parser::match_class_member(StringView class_name)
{
if (match_function_declaration())
return DeclarationType::Function;
if (match_enum_declaration())
return DeclarationType::Enum;
if (match_class_declaration())
return DeclarationType::Class;
if (match_variable_declaration())
return DeclarationType::Variable;
if (match_constructor(class_name))
return DeclarationType::Constructor;
if (match_destructor(class_name))
return DeclarationType::Destructor;
return {};
}
bool Parser::match_enum_declaration()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (!match_keyword("enum"))
return false;
consume(Token::Type::Keyword);
if (match_keyword("class"))
consume(Token::Type::Keyword);
if (!match(Token::Type::Identifier))
return false;
consume(Token::Type::Identifier);
return match(Token::Type::LeftCurly);
}
bool Parser::match_class_declaration()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (!match_keyword("struct") && !match_keyword("class"))
return false;
consume(Token::Type::Keyword);
if (!match(Token::Type::Identifier))
return false;
consume(Token::Type::Identifier);
auto has_final = match_keyword("final");
if (peek(has_final ? 1 : 0).type() == Token::Type::Colon) {
if (has_final)
consume();
do {
consume();
while (match_keyword("private") || match_keyword("public") || match_keyword("protected") || match_keyword("virtual"))
consume();
if (!match_name())
return false;
(void)parse_name(get_dummy_node());
} while (peek().type() == Token::Type::Comma);
}
return match(Token::Type::LeftCurly);
}
bool Parser::match_namespace_declaration()
{
return match_keyword("namespace");
}
bool Parser::match_function_declaration()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
parse_function_qualifiers();
if (!match_type())
return false;
VERIFY(m_root_node);
(void)parse_type(get_dummy_node());
if (!peek(Token::Type::Identifier).has_value())
return false;
consume();
if (!peek(Token::Type::LeftParen).has_value())
return false;
consume();
while (consume().type() != Token::Type::RightParen && !eof()) { };
while (match_keyword("const") || match_keyword("override"))
consume();
if (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value())
return true;
if (match_attribute_specification()) {
consume_attribute_specification();
return peek(Token::Type::Semicolon).has_value();
}
return false;
}
Optional<NonnullRefPtrVector<Parameter>> Parser::parse_parameter_list(ASTNode& parent)
{
LOG_SCOPE();
NonnullRefPtrVector<Parameter> parameters;
while (peek().type() != Token::Type::RightParen && !eof()) {
if (match_ellipsis()) {
auto param = create_ast_node<Parameter>(parent, position(), {}, StringView {});
consume(Token::Type::Dot);
consume(Token::Type::Dot);
auto last_dot = consume(Token::Type::Dot);
param->set_ellipsis(true);
param->set_end(last_dot.end());
parameters.append(move(param));
} else {
auto type = parse_type(parent);
auto name_identifier = peek(Token::Type::Identifier);
if (name_identifier.has_value())
consume(Token::Type::Identifier);
StringView name;
if (name_identifier.has_value())
name = text_of_token(name_identifier.value());
auto param = create_ast_node<Parameter>(parent, type->start(), name_identifier.has_value() ? name_identifier.value().end() : type->end(), name);
type->set_parent(*param.ptr());
param->set_type(move(type));
parameters.append(move(param));
}
if (peek(Token::Type::Comma).has_value())
consume(Token::Type::Comma);
}
return parameters;
}
bool Parser::match_comment()
{
return match(Token::Type::Comment);
}
bool Parser::match_whitespace()
{
return match(Token::Type::Whitespace);
}
bool Parser::match_preprocessor()
{
return match(Token::Type::PreprocessorStatement) || match(Token::Type::IncludeStatement);
}
void Parser::consume_preprocessor()
{
LOG_SCOPE();
switch (peek().type()) {
case Token::Type::PreprocessorStatement:
consume();
break;
case Token::Type::IncludeStatement:
consume();
consume(Token::Type::IncludePath);
break;
default:
error("unexpected token while parsing preprocessor statement");
consume();
}
}
Optional<Token> Parser::consume_whitespace()
{
LOG_SCOPE();
return consume(Token::Type::Whitespace);
}
Token Parser::consume(Token::Type type)
{
auto token = consume();
if (token.type() != type)
error(String::formatted("expected {} at {}:{}, found: {}", Token::type_to_string(type), token.start().line, token.start().column, Token::type_to_string(token.type())));
return token;
}
bool Parser::match(Token::Type type)
{
return peek().type() == type;
}
Token Parser::consume()
{
if (eof()) {
error("C++ Parser: out of tokens");
return { Token::Type::EOF_TOKEN, position(), position(), {} };
}
return m_tokens[m_state.token_index++];
}
Token Parser::peek(size_t offset) const
{
if (m_state.token_index + offset >= m_tokens.size())
return { Token::Type::EOF_TOKEN, position(), position(), {} };
return m_tokens[m_state.token_index + offset];
}
Optional<Token> Parser::peek(Token::Type type) const
{
auto token = peek();
if (token.type() == type)
return token;
return {};
}
void Parser::save_state()
{
m_saved_states.append(m_state);
m_state.state_nodes.clear();
}
void Parser::load_state()
{
m_state = m_saved_states.take_last();
}
StringView Parser::text_of_token(const Cpp::Token& token) const
{
return token.text();
}
String Parser::text_of_node(const ASTNode& node) const
{
return text_in_range(node.start(), node.end());
}
String Parser::text_in_range(Position start, Position end) const
{
StringBuilder builder;
for (auto token : tokens_in_range(start, end)) {
builder.append(token.text());
}
return builder.to_string();
}
Vector<Token> Parser::tokens_in_range(Position start, Position end) const
{
auto start_token_index = index_of_token_at(start);
auto end_node_index = index_of_token_at(end);
VERIFY(start_token_index.has_value());
VERIFY(end_node_index.has_value());
Vector<Token> tokens;
for (size_t i = start_token_index.value(); i <= end_node_index.value(); ++i) {
tokens.append(m_tokens[i]);
}
return tokens;
}
void Parser::error(StringView message)
{
LOG_SCOPE();
if (!m_saved_states.is_empty())
return;
if (message.is_null() || message.is_empty())
message = "<empty>";
String formatted_message;
if (m_state.token_index >= m_tokens.size()) {
formatted_message = String::formatted("C++ Parsed error on EOF.{}", message);
} else {
formatted_message = String::formatted("C++ Parser error: {}. token: {} ({}:{})",
message,
m_state.token_index < m_tokens.size() ? text_of_token(m_tokens[m_state.token_index]) : "EOF",
m_tokens[m_state.token_index].start().line,
m_tokens[m_state.token_index].start().column);
}
m_errors.append(formatted_message);
}
bool Parser::match_expression()
{
return match_literal()
|| match_name()
|| match_unary_expression()
|| match_cpp_cast_expression()
|| match_c_style_cast_expression()
|| match_sizeof_expression()
|| match_braced_init_list();
}
bool Parser::eof() const
{
return m_state.token_index >= m_tokens.size();
}
Position Parser::position() const
{
if (m_tokens.is_empty())
return {};
if (eof())
return m_tokens.last().end();
return peek().start();
}
Position Parser::previous_token_end() const
{
if (m_state.token_index < 1)
return {};
return m_tokens[m_state.token_index - 1].end();
}
RefPtr<ASTNode> Parser::node_at(Position pos) const
{
VERIFY(m_saved_states.is_empty());
auto index = index_of_node_at(pos);
if (!index.has_value())
return nullptr;
return m_nodes[index.value()];
}
Optional<size_t> Parser::index_of_node_at(Position pos) const
{
VERIFY(!m_tokens.is_empty());
VERIFY(m_saved_states.is_empty());
Optional<size_t> match_node_index;
auto node_span = [](const ASTNode& node) {
VERIFY(node.end().line >= node.start().line);
VERIFY((node.end().line > node.start().line) || (node.end().column >= node.start().column));
return Position { node.end().line - node.start().line, node.start().line != node.end().line ? 0 : node.end().column - node.start().column };
};
for (size_t node_index = 0; node_index < m_nodes.size(); ++node_index) {
auto& node = m_nodes[node_index];
if (node.start() > pos || node.end() < pos)
continue;
if (!match_node_index.has_value() || (node_span(node) <= node_span(m_nodes[match_node_index.value()])))
match_node_index = node_index;
}
return match_node_index;
}
Optional<Token> Parser::token_at(Position pos) const
{
auto index = index_of_token_at(pos);
if (!index.has_value())
return {};
return m_tokens[index.value()];
}
Optional<size_t> Parser::index_of_token_at(Position pos) const
{
for (size_t token_index = 0; token_index < m_tokens.size(); ++token_index) {
auto token = m_tokens[token_index];
if (token.start() > pos || token.end() < pos)
continue;
return token_index;
}
return {};
}
void Parser::print_tokens() const
{
for (auto& token : m_tokens) {
outln("{}", token.to_string());
}
}
Vector<Parser::TodoEntry> Parser::get_todo_entries() const
{
Vector<TodoEntry> ret;
for (auto& token : m_tokens) {
if (token.type() == Token::Type::Comment) {
if (token.text().contains("TODO")) {
ret.append({ token.text(), m_filename, token.start().line, token.start().column });
}
}
}
return ret;
}
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(ASTNode& parent)
{
LOG_SCOPE();
Optional<size_t> start_token_index;
Optional<size_t> end_token_index;
while (!eof()) {
auto token = peek();
if (token.type() != Token::Type::DoubleQuotedString && token.type() != Token::Type::SingleQuotedString && token.type() != Token::Type::EscapeSequence) {
VERIFY(start_token_index.has_value());
end_token_index = m_state.token_index - 1;
break;
}
if (!start_token_index.has_value())
start_token_index = m_state.token_index;
consume();
}
// String was not terminated
if (!end_token_index.has_value()) {
end_token_index = m_tokens.size() - 1;
}
VERIFY(start_token_index.has_value());
VERIFY(end_token_index.has_value());
Token start_token = m_tokens[start_token_index.value()];
Token end_token = m_tokens[end_token_index.value()];
auto text = text_in_range(start_token.start(), end_token.end());
auto string_literal = create_ast_node<StringLiteral>(parent, start_token.start(), end_token.end());
string_literal->set_value(move(text));
return string_literal;
}
NonnullRefPtr<ReturnStatement> Parser::parse_return_statement(ASTNode& parent)
{
LOG_SCOPE();
auto return_statement = create_ast_node<ReturnStatement>(parent, position(), {});
consume(Token::Type::Keyword);
if (!peek(Token::Type::Semicolon).has_value()) {
return_statement->set_value(parse_expression(*return_statement));
}
return_statement->set_end(position());
return return_statement;
}
NonnullRefPtr<EnumDeclaration> Parser::parse_enum_declaration(ASTNode& parent)
{
LOG_SCOPE();
auto enum_decl = create_ast_node<EnumDeclaration>(parent, position(), {});
consume_keyword("enum");
if (match_keyword("class")) {
consume(Token::Type::Keyword);
enum_decl->set_type(EnumDeclaration::Type::EnumClass);
} else {
enum_decl->set_type(EnumDeclaration::Type::RegularEnum);
}
auto name_token = consume(Token::Type::Identifier);
enum_decl->set_name(text_of_token(name_token));
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
auto name = text_of_token(consume(Token::Type::Identifier));
RefPtr<Expression> value;
if (peek().type() == Token::Type::Equals) {
consume();
value = parse_expression(enum_decl);
}
enum_decl->add_entry(name, move(value));
if (peek().type() != Token::Type::Comma) {
break;
}
consume(Token::Type::Comma);
}
consume(Token::Type::RightCurly);
consume(Token::Type::Semicolon);
enum_decl->set_end(position());
return enum_decl;
}
Token Parser::consume_keyword(const String& keyword)
{
auto token = consume();
if (token.type() != Token::Type::Keyword) {
error(String::formatted("unexpected token: {}, expected Keyword", token.to_string()));
return token;
}
if (text_of_token(token) != keyword) {
error(String::formatted("unexpected keyword: {}, expected {}", text_of_token(token), keyword));
return token;
}
return token;
}
bool Parser::match_keyword(const String& keyword)
{
auto token = peek();
if (token.type() != Token::Type::Keyword) {
return false;
}
if (text_of_token(token) != keyword) {
return false;
}
return true;
}
NonnullRefPtr<StructOrClassDeclaration> Parser::parse_class_declaration(ASTNode& parent)
{
LOG_SCOPE();
auto type_token = consume(Token::Type::Keyword);
StructOrClassDeclaration::Type type {};
if (type_token.text() == "struct")
type = StructOrClassDeclaration::Type::Struct;
if (type_token.text() == "class")
type = StructOrClassDeclaration::Type::Class;
auto decl = create_ast_node<StructOrClassDeclaration>(parent, position(), {}, type);
auto name_token = consume(Token::Type::Identifier);
decl->set_name(text_of_token(name_token));
auto has_final = match_keyword("final");
// FIXME: Don't ignore this.
if (peek(has_final ? 1 : 0).type() == Token::Type::Colon) {
if (has_final)
consume();
do {
consume();
while (match_keyword("private") || match_keyword("public") || match_keyword("protected") || match_keyword("virtual"))
consume();
(void)parse_name(get_dummy_node());
} while (peek().type() == Token::Type::Comma);
}
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
decl->set_members(parse_class_members(*decl));
}
consume(Token::Type::RightCurly);
consume(Token::Type::Semicolon);
decl->set_end(position());
return decl;
}
NonnullRefPtr<BooleanLiteral> Parser::parse_boolean_literal(ASTNode& parent)
{
LOG_SCOPE();
auto token = consume(Token::Type::Keyword);
auto text = text_of_token(token);
// text == "true" || text == "false";
bool value = (text == "true");
return create_ast_node<BooleanLiteral>(parent, token.start(), token.end(), value);
}
bool Parser::match_boolean_literal()
{
auto token = peek();
if (token.type() != Token::Type::Keyword)
return false;
auto text = text_of_token(token);
return text == "true" || text == "false";
}
NonnullRefPtr<Type> Parser::parse_type(ASTNode& parent)
{
LOG_SCOPE();
if (!match_named_type()) {
error("expected named named_type");
auto token = consume();
return create_ast_node<NamedType>(parent, token.start(), token.end());
}
auto named_type = create_ast_node<NamedType>(parent, position(), {});
auto qualifiers = parse_type_qualifiers();
named_type->set_qualifiers(move(qualifiers));
if (match_keyword("auto")) {
consume(Token::Type::Keyword);
named_type->set_auto(true);
auto original_qualifiers = named_type->qualifiers();
original_qualifiers.extend(parse_type_qualifiers());
named_type->set_qualifiers(move(original_qualifiers));
named_type->set_end(position());
return named_type;
}
if (match_keyword("struct")) {
consume(Token::Type::Keyword); // Consume struct prefix
}
if (!match_name()) {
named_type->set_end(position());
error(String::formatted("expected name instead of: {}", peek().text()));
return named_type;
}
named_type->set_name(parse_name(*named_type));
auto original_qualifiers = named_type->qualifiers();
original_qualifiers.extend(parse_type_qualifiers());
named_type->set_qualifiers(move(original_qualifiers));
NonnullRefPtr<Type> type = named_type;
while (!eof() && peek().type() == Token::Type::Asterisk) {
type->set_end(position());
auto asterisk = consume();
auto ptr = create_ast_node<Pointer>(parent, type->start(), asterisk.end());
type->set_parent(*ptr);
ptr->set_pointee(type);
ptr->set_qualifiers(parse_type_qualifiers());
ptr->set_end(position());
type = ptr;
}
if (!eof() && (peek().type() == Token::Type::And || peek().type() == Token::Type::AndAnd)) {
type->set_end(position());
auto ref_token = consume();
auto ref = create_ast_node<Reference>(parent, type->start(), ref_token.end(), ref_token.type() == Token::Type::And ? Reference::Kind::Lvalue : Reference::Kind::Rvalue);
type->set_parent(*ref);
ref->set_referenced_type(type);
ref->set_end(position());
type = ref;
}
if (peek().type() == Token::Type::LeftParen) {
consume();
auto fn_type = create_ast_node<FunctionType>(parent, type->start(), position());
fn_type->set_return_type(*type);
type->set_parent(*fn_type);
if (auto parameters = parse_parameter_list(*type); parameters.has_value())
fn_type->set_parameters(parameters.release_value());
consume(Token::Type::RightParen);
type = fn_type;
}
type->set_end(previous_token_end());
return type;
}
NonnullRefPtr<ForStatement> Parser::parse_for_statement(ASTNode& parent)
{
LOG_SCOPE();
auto for_statement = create_ast_node<ForStatement>(parent, position(), {});
consume(Token::Type::Keyword);
consume(Token::Type::LeftParen);
if (peek().type() != Token::Type::Semicolon)
for_statement->set_init(parse_variable_declaration(*for_statement, false));
consume(Token::Type::Semicolon);
if (peek().type() != Token::Type::Semicolon)
for_statement->set_test(parse_expression(*for_statement));
consume(Token::Type::Semicolon);
if (peek().type() != Token::Type::RightParen)
for_statement->set_update(parse_expression(*for_statement));
consume(Token::Type::RightParen);
for_statement->set_body(parse_statement(*for_statement));
for_statement->set_end(for_statement->body()->end());
return for_statement;
}
NonnullRefPtr<IfStatement> Parser::parse_if_statement(ASTNode& parent)
{
LOG_SCOPE();
auto if_statement = create_ast_node<IfStatement>(parent, position(), {});
consume(Token::Type::Keyword);
consume(Token::Type::LeftParen);
if_statement->set_predicate(parse_expression(*if_statement));
consume(Token::Type::RightParen);
if_statement->set_then_statement(parse_statement(*if_statement));
if (match_keyword("else")) {
consume(Token::Type::Keyword);
if_statement->set_else_statement(parse_statement(*if_statement));
if_statement->set_end(if_statement->else_statement()->end());
} else {
if_statement->set_end(if_statement->then_statement()->end());
}
return if_statement;
}
Vector<StringView> Parser::parse_type_qualifiers()
{
LOG_SCOPE();
Vector<StringView> qualifiers;
while (!eof()) {
auto token = peek();
if (token.type() != Token::Type::Keyword)
break;
auto text = text_of_token(token);
if (text == "static" || text == "const" || text == "extern") {
qualifiers.append(text);
consume();
} else {
break;
}
}
return qualifiers;
}
Vector<StringView> Parser::parse_function_qualifiers()
{
LOG_SCOPE();
Vector<StringView> qualifiers;
while (!eof()) {
auto token = peek();
if (token.type() != Token::Type::Keyword)
break;
auto text = text_of_token(token);
if (text == "static" || text == "inline" || text == "extern" || text == "virtual") {
qualifiers.append(text);
consume();
} else {
break;
}
}
return qualifiers;
}
bool Parser::match_attribute_specification()
{
return text_of_token(peek()) == "__attribute__";
}
void Parser::consume_attribute_specification()
{
consume(); // __attribute__
consume(Token::Type::LeftParen);
size_t left_count = 1;
while (!eof()) {
auto token = consume();
if (token.type() == Token::Type::LeftParen) {
++left_count;
}
if (token.type() == Token::Type::RightParen) {
--left_count;
}
if (left_count == 0)
return;
}
}
bool Parser::match_ellipsis()
{
if (m_state.token_index > m_tokens.size() - 3)
return false;
return peek().type() == Token::Type::Dot && peek(1).type() == Token::Type::Dot && peek(2).type() == Token::Type::Dot;
}
NonnullRefPtr<NamespaceDeclaration> Parser::parse_namespace_declaration(ASTNode& parent, bool is_nested_namespace)
{
auto namespace_decl = create_ast_node<NamespaceDeclaration>(parent, position(), {});
if (!is_nested_namespace)
consume(Token::Type::Keyword);
auto name_token = consume(Token::Type::Identifier);
namespace_decl->set_name(name_token.text());
if (peek().type() == Token::Type::ColonColon) {
consume(Token::Type::ColonColon);
namespace_decl->add_declaration(parse_namespace_declaration(*namespace_decl, true));
namespace_decl->set_end(position());
return namespace_decl;
}
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
auto declaration = parse_single_declaration_in_translation_unit(*namespace_decl);
if (declaration) {
namespace_decl->add_declaration(declaration.release_nonnull());
} else {
error("unexpected token");
consume();
}
}
consume(Token::Type::RightCurly);
namespace_decl->set_end(position());
return namespace_decl;
}
bool Parser::match_name()
{
auto type = peek().type();
return type == Token::Type::Identifier || type == Token::Type::KnownType;
}
NonnullRefPtr<Name> Parser::parse_name(ASTNode& parent)
{
LOG_SCOPE();
NonnullRefPtr<Name> name_node = create_ast_node<Name>(parent, position(), {});
while (!eof() && (peek().type() == Token::Type::Identifier || peek().type() == Token::Type::KnownType) && peek(1).type() == Token::Type::ColonColon) {
auto token = consume();
name_node->add_to_scope(create_ast_node<Identifier>(*name_node, token.start(), token.end(), token.text()));
consume(Token::Type::ColonColon);
}
if (peek().type() == Token::Type::Identifier || peek().type() == Token::Type::KnownType) {
auto token = consume();
name_node->set_name(create_ast_node<Identifier>(*name_node, token.start(), token.end(), token.text()));
} else {
name_node->set_end(position());
return name_node;
}
if (match_template_arguments()) {
consume(Token::Type::Less);
NonnullRefPtr<TemplatizedName> templatized_name = create_ast_node<TemplatizedName>(parent, name_node->start(), {});
templatized_name->set_name(name_node->name());
templatized_name->set_scope(name_node->scope());
name_node->set_end(position());
name_node = templatized_name;
while (peek().type() != Token::Type::Greater && !eof()) {
templatized_name->add_template_argument(parse_type(*templatized_name));
if (peek().type() == Token::Type::Comma)
consume(Token::Type::Comma);
}
consume(Token::Type::Greater);
}
name_node->set_end(previous_token_end());
return name_node;
}
bool Parser::match_cpp_cast_expression()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
auto token = consume();
if (token.type() != Token::Type::Keyword)
return false;
auto text = token.text();
if (text == "static_cast" || text == "reinterpret_cast" || text == "dynamic_cast" || text == "const_cast")
return true;
return false;
}
bool Parser::match_c_style_cast_expression()
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (consume().type() != Token::Type::LeftParen)
return false;
if (!match_type())
return false;
(void)parse_type(get_dummy_node());
if (consume().type() != Token::Type::RightParen)
return false;
if (!match_expression())
return false;
return true;
}
NonnullRefPtr<CStyleCastExpression> Parser::parse_c_style_cast_expression(ASTNode& parent)
{
auto parse_exp = create_ast_node<CStyleCastExpression>(parent, position(), {});
consume(Token::Type::LeftParen);
parse_exp->set_type(parse_type(*parse_exp));
consume(Token::Type::RightParen);
parse_exp->set_expression(parse_expression(*parse_exp));
parse_exp->set_end(position());
return parse_exp;
}
NonnullRefPtr<CppCastExpression> Parser::parse_cpp_cast_expression(ASTNode& parent)
{
auto cast_expression = create_ast_node<CppCastExpression>(parent, position(), {});
cast_expression->set_cast_type(consume(Token::Type::Keyword).text());
consume(Token::Type::Less);
cast_expression->set_type(parse_type(*cast_expression));
consume(Token::Type::Greater);
consume(Token::Type::LeftParen);
cast_expression->set_expression(parse_expression(*cast_expression));
consume(Token::Type::RightParen);
cast_expression->set_end(position());
return cast_expression;
}
bool Parser::match_sizeof_expression()
{
return match_keyword("sizeof");
}
NonnullRefPtr<SizeofExpression> Parser::parse_sizeof_expression(ASTNode& parent)
{
auto exp = create_ast_node<SizeofExpression>(parent, position(), {});
consume(Token::Type::Keyword);
consume(Token::Type::LeftParen);
exp->set_type(parse_type(parent));
consume(Token::Type::RightParen);
exp->set_end(position());
return exp;
}
bool Parser::match_braced_init_list()
{
return match(Token::Type::LeftCurly);
}
NonnullRefPtr<BracedInitList> Parser::parse_braced_init_list(ASTNode& parent)
{
auto init_list = create_ast_node<BracedInitList>(parent, position(), {});
consume(Token::Type::LeftCurly);
while (!eof() && peek().type() != Token::Type::RightCurly) {
init_list->add_expression(parse_expression(*init_list));
}
consume(Token::Type::RightCurly);
init_list->set_end(position());
return init_list;
}
NonnullRefPtrVector<Declaration> Parser::parse_class_members(StructOrClassDeclaration& parent)
{
auto class_name = parent.name();
NonnullRefPtrVector<Declaration> members;
while (!eof() && peek().type() != Token::Type::RightCurly) {
if (match_access_specifier())
consume_access_specifier(); // FIXME: Do not ignore access specifiers
auto member_type = match_class_member(class_name);
if (member_type.has_value()) {
members.append(parse_declaration(parent, member_type.value()));
} else {
error("Expected class member");
consume();
}
}
return members;
}
bool Parser::match_access_specifier()
{
if (peek(1).type() != Token::Type::Colon)
return false;
return match_keyword("private") || match_keyword("protected") || match_keyword("public");
}
void Parser::consume_access_specifier()
{
consume(Token::Type::Keyword);
consume(Token::Type::Colon);
}
bool Parser::match_constructor(StringView class_name)
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
auto token = consume();
if (token.text() != class_name)
return false;
if (!peek(Token::Type::LeftParen).has_value())
return false;
consume();
while (consume().type() != Token::Type::RightParen && !eof()) { };
return (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value());
}
bool Parser::match_destructor(StringView class_name)
{
save_state();
ScopeGuard state_guard = [this] { load_state(); };
if (match_keyword("virtual"))
consume();
if (!match(Token::Type::Tilde))
return false;
consume();
auto token = peek();
if (token.text() != class_name)
return false;
consume();
if (!peek(Token::Type::LeftParen).has_value())
return false;
consume();
while (consume().type() != Token::Type::RightParen && !eof()) { };
if (match_keyword("override"))
consume();
return (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value());
}
void Parser::parse_constructor_or_destructor_impl(FunctionDeclaration& func, CtorOrDtor type)
{
if (type == CtorOrDtor::Dtor) {
if (match_keyword("virtual"))
func.set_qualifiers({ consume().text() });
consume(Token::Type::Tilde);
}
auto name_token = consume();
if (name_token.type() != Token::Type::Identifier && name_token.type() != Token::Type::KnownType) {
error("Unexpected constructor name");
}
func.set_name(name_token.text());
consume(Token::Type::LeftParen);
auto parameters = parse_parameter_list(func);
if (parameters.has_value()) {
if (type == CtorOrDtor::Dtor && !parameters->is_empty())
error("Destructor declaration that takes parameters");
else
func.set_parameters(parameters.value());
}
consume(Token::Type::RightParen);
if (type == CtorOrDtor::Dtor && match_keyword("override"))
consume();
// TODO: Parse =default, =delete.
RefPtr<FunctionDefinition> body;
Position ctor_end {};
if (peek(Token::Type::LeftCurly).has_value()) {
body = parse_function_definition(func);
ctor_end = body->end();
} else {
ctor_end = position();
if (match_attribute_specification())
consume_attribute_specification(); // we don't use the value of __attribute__
consume(Token::Type::Semicolon);
}
func.set_definition(move(body));
func.set_end(ctor_end);
}
NonnullRefPtr<Constructor> Parser::parse_constructor(ASTNode& parent)
{
auto ctor = create_ast_node<Constructor>(parent, position(), {});
parse_constructor_or_destructor_impl(*ctor, CtorOrDtor::Ctor);
return ctor;
}
NonnullRefPtr<Destructor> Parser::parse_destructor(ASTNode& parent)
{
auto ctor = create_ast_node<Destructor>(parent, position(), {});
parse_constructor_or_destructor_impl(*ctor, CtorOrDtor::Dtor);
return ctor;
}
}