diff --git a/AK/ScopeLogger.h b/AK/ScopeLogger.h new file mode 100644 index 0000000000..748fbd11b2 --- /dev/null +++ b/AK/ScopeLogger.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020, Denis Campredon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +#ifdef DEBUG_SPAM + +namespace AK { +class ScopeLogger { +public: + ScopeLogger(StringView&& fun) + : m_fun(fun) + { + StringBuilder sb; + + for (auto indent = m_depth++; indent > 0; indent--) + sb.append(' '); + dbgln("\033[1;{}m{}entering {}\033[0m", m_depth % 8 + 30, sb.to_string(), m_fun); + } + ~ScopeLogger() + { + StringBuilder sb; + + for (auto indent = --m_depth; indent > 0; indent--) + sb.append(' '); + dbgln("\033[1;{}m{}leaving {}\033[0m", (m_depth + 1) % 8 + 30, sb.to_string(), m_fun); + } + +private: + static inline size_t m_depth = 0; + StringView m_fun; +}; +} + +using AK::ScopeLogger; +# define SCOPE_LOGGER() auto tmp##__COUNTER__ = ScopeLogger(__PRETTY_FUNCTION__); + +#else +# define SCOPE_LOGGER() +#endif diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index 2e54cb1fc7..2d93233773 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -159,6 +159,7 @@ set(UPDATE_COALESCING_DEBUG ON) set(VOLATILE_PAGE_RANGES_DEBUG ON) set(WSMESSAGELOOP_DEBUG ON) set(GPT_DEBUG ON) +set(CPP_DEBUG ON) # False positive: DEBUG is a flag but it works differently. # set(DEBUG ON) diff --git a/Userland/Libraries/LibCpp/AST.cpp b/Userland/Libraries/LibCpp/AST.cpp new file mode 100644 index 0000000000..a2b6db8cfe --- /dev/null +++ b/Userland/Libraries/LibCpp/AST.cpp @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2021, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "AST.h" +#include "AK/LogStream.h" + +namespace Cpp { + +static void print_indent(int indent) +{ + for (int i = 0; i < indent * 2; ++i) + dbgprintf(" "); +} + +void ASTNode::dump(size_t indent) const +{ + print_indent(indent); + dbgprintf("%s[%lu:%lu->%lu:%lu]\n", class_name(), start().line, start().column, end().line, end().column); +} + +void TranslationUnit::dump(size_t indent) const +{ + ASTNode::dump(indent); + for (const auto& child : m_children) { + child.dump(indent + 1); + } +} + +void FunctionDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_return_type->dump(indent + 1); + if (!m_name.is_null()) { + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + } + print_indent(indent + 1); + dbgprintf("(\n"); + for (const auto& arg : m_parameters) { + arg.dump(indent + 1); + } + print_indent(indent + 1); + dbgprintf(")\n"); + if (!m_definition.is_null()) { + m_definition->dump(indent + 1); + } +} + +NonnullRefPtrVector FunctionDeclaration::declarations() const +{ + NonnullRefPtrVector declarations; + for (auto& arg : m_parameters) { + declarations.append(arg); + } + return declarations; +} + +void Type::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); +} + +void Parameter::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (!m_name.is_null()) { + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + } + m_type->dump(indent + 1); + // print_indent(indent); + // dbgprintf("%s [%s]\n", m_name.is_null() ? "" : m_name.to_string().characters(), m_type->name().to_string().characters()); +} + +void FunctionDefinition::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("{\n"); + for (const auto& statement : m_statements) { + statement.dump(indent + 1); + } + print_indent(indent); + dbgprintf("}\n"); +} + +NonnullRefPtrVector FunctionDefinition::declarations() const +{ + NonnullRefPtrVector declarations; + for (auto& statement : m_statements) { + declarations.append(statement.declarations()); + } + return declarations; +} + +void VariableDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_type->dump(indent + 1); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + if (m_initial_value) + m_initial_value->dump(indent + 1); +} + +void Identifier::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); +} + +void NumericLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_value.to_string().characters()); +} + +void BinaryExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case BinaryOp::Addition: + op_string = "+"; + break; + case BinaryOp::Subtraction: + op_string = "-"; + break; + case BinaryOp::Multiplication: + op_string = "*"; + break; + case BinaryOp::Division: + op_string = "/"; + break; + case BinaryOp::Modulo: + op_string = "%"; + break; + case BinaryOp::GreaterThan: + op_string = ">"; + break; + case BinaryOp::GreaterThanEquals: + op_string = ">="; + break; + case BinaryOp::LessThan: + op_string = "<"; + break; + case BinaryOp::LessThanEquals: + op_string = "<="; + break; + case BinaryOp::BitwiseAnd: + op_string = "&"; + break; + case BinaryOp::BitwiseOr: + op_string = "|"; + break; + case BinaryOp::BitwiseXor: + op_string = "^"; + break; + case BinaryOp::LeftShift: + op_string = "<<"; + break; + case BinaryOp::RightShift: + op_string = ">>"; + break; + } + + m_lhs->dump(indent + 1); + print_indent(indent + 1); + ASSERT(op_string); + dbgprintf("%s\n", op_string); + m_rhs->dump(indent + 1); +} + +void AssignmentExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case AssignmentOp::Assignment: + op_string = "="; + break; + case AssignmentOp::AdditionAssignment: + op_string = "+="; + break; + case AssignmentOp::SubtractionAssignment: + op_string = "-="; + break; + } + + m_lhs->dump(indent + 1); + print_indent(indent + 1); + ASSERT(op_string); + dbgprintf("%s\n", op_string); + m_rhs->dump(indent + 1); +} + +void FunctionCall::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (const auto& arg : m_arguments) { + arg.dump(indent + 1); + } +} + +void StringLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_value.to_string().characters()); +} + +void ReturnStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_value->dump(indent + 1); +} + +void EnumDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (auto& entry : m_entries) { + print_indent(indent + 1); + dbgprintf("%s\n", entry.to_string().characters()); + } +} + +void StructOrClassDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (auto& member : m_members) { + member.dump(indent + 1); + } +} + +void MemberDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_type->dump(indent + 1); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + if (m_initial_value) { + m_initial_value->dump(indent + 2); + } +} + +void UnaryExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case UnaryOp::BitwiseNot: + op_string = "~"; + break; + case UnaryOp::Not: + op_string = "!"; + break; + case UnaryOp::Plus: + op_string = "+"; + break; + case UnaryOp::Minus: + op_string = "-"; + break; + case UnaryOp::PlusPlus: + op_string = "++"; + break; + default: + op_string = ""; + } + + ASSERT(op_string); + print_indent(indent + 1); + dbgprintf("%s\n", op_string); + m_lhs->dump(indent + 1); +} + +void BooleanLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_value ? "true" : "false"); +} + +void Pointer::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (!m_pointee.is_null()) { + m_pointee->dump(indent + 1); + } +} + +void MemberExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_object->dump(indent + 1); + m_property->dump(indent + 1); +} + +void BlockStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + for (auto& statement : m_statements) { + statement.dump(indent + 1); + } +} + +void ForStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (m_init) + m_init->dump(indent + 1); + if (m_test) + m_test->dump(indent + 1); + if (m_update) + m_update->dump(indent + 1); + if (m_body) + m_body->dump(indent + 1); +} + +NonnullRefPtrVector Statement::declarations() const +{ + if (is_declaration()) { + NonnullRefPtrVector vec; + const auto& decl = static_cast(*this); + vec.empend(const_cast(decl)); + return vec; + } + return {}; +} + +NonnullRefPtrVector ForStatement::declarations() const +{ + auto declarations = m_init->declarations(); + declarations.append(m_body->declarations()); + return declarations; +} + +NonnullRefPtrVector BlockStatement::declarations() const +{ + NonnullRefPtrVector declarations; + for (auto& statement : m_statements) { + declarations.append(statement.declarations()); + } + return declarations; +} + +} diff --git a/Userland/Libraries/LibCpp/AST.h b/Userland/Libraries/LibCpp/AST.h new file mode 100644 index 0000000000..5f31b0f2e4 --- /dev/null +++ b/Userland/Libraries/LibCpp/AST.h @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2021, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "Applications/Piano/Music.h" +#include +#include +#include +#include +#include +#include + +namespace Cpp { + +class ASTNode; +class TranslationUnit; +class Declaration; +class FunctionDefinition; +class Type; +class Parameter; +class Statement; + +class ASTNode : public RefCounted { +public: + virtual ~ASTNode() = default; + virtual const char* class_name() const = 0; + virtual void dump(size_t indent) const; + + ASTNode* parent() const { return m_parent; } + Position start() const + { + ASSERT(m_start.has_value()); + return m_start.value(); + } + Position end() const + { + ASSERT(m_end.has_value()); + return m_end.value(); + } + void set_end(const Position& end) { m_end = end; } + void set_parent(ASTNode& parent) { m_parent = &parent; } + + virtual NonnullRefPtrVector declarations() const { return {}; } + + virtual bool is_identifier() const { return false; } + virtual bool is_member_expression() const { return false; } + virtual bool is_variable_or_parameter_declaration() const { return false; } + +protected: + ASTNode(ASTNode* parent, Optional start, Optional end) + : m_parent(parent) + , m_start(start) + , m_end(end) + { + } + +private: + ASTNode* m_parent { nullptr }; + Optional m_start; + Optional m_end; +}; + +class TranslationUnit : public ASTNode { + +public: + virtual ~TranslationUnit() override = default; + const NonnullRefPtrVector& children() const { return m_children; } + virtual const char* class_name() const override { return "TranslationUnit"; } + virtual void dump(size_t indent) const override; + void append(NonnullRefPtr child) + { + m_children.append(move(child)); + } + virtual NonnullRefPtrVector declarations() const override { return m_children; } + +public: + TranslationUnit(ASTNode* parent, Optional start, Optional end) + : ASTNode(parent, start, end) + { + } + +private: + NonnullRefPtrVector m_children; +}; + +class Statement : public ASTNode { +public: + virtual ~Statement() override = default; + virtual const char* class_name() const override { return "Statement"; } + + virtual bool is_declaration() const { return false; } + virtual NonnullRefPtrVector declarations() const override; + +protected: + Statement(ASTNode* parent, Optional start, Optional end) + : ASTNode(parent, start, end) + { + } +}; + +class Declaration : public Statement { + +public: + virtual bool is_declaration() const override { return true; } + virtual bool is_variable_declaration() const { return false; } + virtual bool is_parameter() const { return false; } + virtual bool is_struct_or_class() const { return false; } + +protected: + Declaration(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } +}; + +class InvalidDeclaration : public Declaration { + +public: + virtual ~InvalidDeclaration() override = default; + virtual const char* class_name() const override { return "InvalidDeclaration"; } + InvalidDeclaration(ASTNode* parent, Optional start, Optional end) + : Declaration(parent, start, end) + { + } +}; + +class FunctionDeclaration : public Declaration { +public: + virtual ~FunctionDeclaration() override = default; + virtual const char* class_name() const override { return "FunctionDeclaration"; } + virtual void dump(size_t indent) const override; + const StringView& name() const { return m_name; } + RefPtr definition() { return m_definition; } + + FunctionDeclaration(ASTNode* parent, Optional start, Optional end) + : Declaration(parent, start, end) + { + } + + virtual NonnullRefPtrVector declarations() const override; + + StringView m_name; + RefPtr m_return_type; + NonnullRefPtrVector m_parameters; + RefPtr m_definition; +}; + +class VariableOrParameterDeclaration : public Declaration { +public: + virtual ~VariableOrParameterDeclaration() override = default; + virtual bool is_variable_or_parameter_declaration() const override { return true; } + + StringView m_name; + RefPtr m_type; + +protected: + VariableOrParameterDeclaration(ASTNode* parent, Optional start, Optional end) + : Declaration(parent, start, end) + { + } +}; + +class Parameter : public VariableOrParameterDeclaration { +public: + virtual ~Parameter() override = default; + virtual const char* class_name() const override { return "Parameter"; } + virtual void dump(size_t indent) const override; + + Parameter(ASTNode* parent, Optional start, Optional end, StringView name) + : VariableOrParameterDeclaration(parent, start, end) + { + m_name = name; + } + + virtual bool is_parameter() const override { return true; } +}; + +class Type : public ASTNode { +public: + virtual ~Type() override = default; + virtual const char* class_name() const override { return "Type"; } + const StringView& name() const { return m_name; } + virtual void dump(size_t indent) const override; + + Type(ASTNode* parent, Optional start, Optional end, StringView name) + : ASTNode(parent, start, end) + , m_name(name) + { + } + + StringView m_name; +}; + +class Pointer : public Type { +public: + virtual ~Pointer() override = default; + virtual const char* class_name() const override { return "Pointer"; } + virtual void dump(size_t indent) const override; + + Pointer(ASTNode* parent, Optional start, Optional end) + : Type(parent, start, end, {}) + { + } + + RefPtr m_pointee; +}; + +class FunctionDefinition : public ASTNode { +public: + virtual ~FunctionDefinition() override = default; + virtual const char* class_name() const override { return "FunctionDefinition"; } + NonnullRefPtrVector& statements() { return m_statements; } + virtual void dump(size_t indent) const override; + + FunctionDefinition(ASTNode* parent, Optional start, Optional end) + : ASTNode(parent, start, end) + { + } + + virtual NonnullRefPtrVector declarations() const override; + + NonnullRefPtrVector m_statements; +}; + +class InvalidStatement : public Statement { +public: + virtual ~InvalidStatement() override = default; + virtual const char* class_name() const override { return "InvalidStatement"; } + InvalidStatement(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } +}; + +class Expression : public Statement { +public: + virtual ~Expression() override = default; + virtual const char* class_name() const override { return "Expression"; } + +protected: + Expression(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } +}; + +class InvalidExpression : public Expression { +public: + virtual ~InvalidExpression() override = default; + virtual const char* class_name() const override { return "InvalidExpression"; } + InvalidExpression(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } +}; + +class VariableDeclaration : public VariableOrParameterDeclaration { +public: + virtual ~VariableDeclaration() override = default; + virtual const char* class_name() const override { return "VariableDeclaration"; } + virtual void dump(size_t indent) const override; + + VariableDeclaration(ASTNode* parent, Optional start, Optional end) + : VariableOrParameterDeclaration(parent, start, end) + { + } + + virtual bool is_variable_declaration() const override { return true; } + + RefPtr m_initial_value; +}; + +class Identifier : public Expression { +public: + virtual ~Identifier() override = default; + virtual const char* class_name() const override { return "Identifier"; } + virtual void dump(size_t indent) const override; + + Identifier(ASTNode* parent, Optional start, Optional end, StringView name) + : Expression(parent, start, end) + , m_name(name) + { + } + Identifier(ASTNode* parent, Optional start, Optional end) + : Identifier(parent, start, end, {}) + { + } + + virtual bool is_identifier() const override { return true; } + + StringView m_name; +}; + +class NumericLiteral : public Expression { +public: + virtual ~NumericLiteral() override = default; + virtual const char* class_name() const override { return "NumricLiteral"; } + virtual void dump(size_t indent) const override; + + NumericLiteral(ASTNode* parent, Optional start, Optional end, StringView value) + : Expression(parent, start, end) + , m_value(value) + { + } + + StringView m_value; +}; + +class BooleanLiteral : public Expression { +public: + virtual ~BooleanLiteral() override = default; + virtual const char* class_name() const override { return "BooleanLiteral"; } + virtual void dump(size_t indent) const override; + + BooleanLiteral(ASTNode* parent, Optional start, Optional end, bool value) + : Expression(parent, start, end) + , m_value(value) + { + } + + bool m_value; +}; + +enum class BinaryOp { + Addition, + Subtraction, + Multiplication, + Division, + Modulo, + GreaterThan, + GreaterThanEquals, + LessThan, + LessThanEquals, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + LeftShift, + RightShift, +}; + +class BinaryExpression : public Expression { +public: + BinaryExpression(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + virtual ~BinaryExpression() override = default; + virtual const char* class_name() const override { return "BinaryExpression"; } + virtual void dump(size_t indent) const override; + + BinaryOp m_op; + RefPtr m_lhs; + RefPtr m_rhs; +}; + +enum class AssignmentOp { + Assignment, + AdditionAssignment, + SubtractionAssignment, +}; + +class AssignmentExpression : public Expression { +public: + AssignmentExpression(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + virtual ~AssignmentExpression() override = default; + virtual const char* class_name() const override { return "AssignmentExpression"; } + virtual void dump(size_t indent) const override; + + AssignmentOp m_op; + RefPtr m_lhs; + RefPtr m_rhs; +}; + +class FunctionCall final : public Expression { +public: + FunctionCall(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + ~FunctionCall() override = default; + virtual const char* class_name() const override { return "FunctionCall"; } + virtual void dump(size_t indent) const override; + + StringView m_name; + NonnullRefPtrVector m_arguments; +}; + +class StringLiteral final : public Expression { +public: + StringLiteral(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + ~StringLiteral() override = default; + virtual const char* class_name() const override { return "StringLiteral"; } + virtual void dump(size_t indent) const override; + + StringView m_value; +}; + +class ReturnStatement : public Statement { +public: + virtual ~ReturnStatement() override = default; + virtual const char* class_name() const override { return "ReturnStatement"; } + + ReturnStatement(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } + virtual void dump(size_t indent) const override; + + RefPtr m_value; +}; + +class EnumDeclaration : public Declaration { +public: + virtual ~EnumDeclaration() override = default; + virtual const char* class_name() const override { return "EnumDeclaration"; } + virtual void dump(size_t indent) const override; + + EnumDeclaration(ASTNode* parent, Optional start, Optional end) + : Declaration(parent, start, end) + { + } + + StringView m_name; + Vector m_entries; +}; + +class MemberDeclaration : public Declaration { +public: + virtual ~MemberDeclaration() override = default; + virtual const char* class_name() const override { return "MemberDeclaration"; } + virtual void dump(size_t indent) const override; + + MemberDeclaration(ASTNode* parent, Optional start, Optional end) + : Declaration(parent, start, end) + { + } + + RefPtr m_type; + StringView m_name; + RefPtr m_initial_value; +}; + +class StructOrClassDeclaration : public Declaration { +public: + virtual ~StructOrClassDeclaration() override = default; + virtual const char* class_name() const override { return "StructOrClassDeclaration"; } + virtual void dump(size_t indent) const override; + virtual bool is_struct_or_class() const override { return true; } + + enum class Type { + Struct, + Class + }; + + StructOrClassDeclaration(ASTNode* parent, Optional start, Optional end, StructOrClassDeclaration::Type type) + : Declaration(parent, start, end) + , m_type(type) + { + } + + StructOrClassDeclaration::Type m_type; + StringView m_name; + NonnullRefPtrVector m_members; +}; + +enum class UnaryOp { + Invalid, + BitwiseNot, + Not, + Plus, + Minus, + PlusPlus, +}; + +class UnaryExpression : public Expression { +public: + UnaryExpression(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + virtual ~UnaryExpression() override = default; + virtual const char* class_name() const override { return "UnaryExpression"; } + virtual void dump(size_t indent) const override; + + UnaryOp m_op; + RefPtr m_lhs; +}; + +class MemberExpression : public Expression { +public: + MemberExpression(ASTNode* parent, Optional start, Optional end) + : Expression(parent, start, end) + { + } + + virtual ~MemberExpression() override = default; + virtual const char* class_name() const override { return "MemberExpression"; } + virtual void dump(size_t indent) const override; + virtual bool is_member_expression() const override { return true; } + + RefPtr m_object; + RefPtr m_property; +}; + +class ForStatement : public Statement { +public: + ForStatement(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } + + virtual ~ForStatement() override = default; + virtual const char* class_name() const override { return "ForStatement"; } + virtual void dump(size_t indent) const override; + + virtual NonnullRefPtrVector declarations() const override; + + RefPtr m_init; + RefPtr m_test; + RefPtr m_update; + RefPtr m_body; +}; + +class BlockStatement final : public Statement { +public: + BlockStatement(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } + + virtual ~BlockStatement() override = default; + virtual const char* class_name() const override { return "BlockStatement"; } + virtual void dump(size_t indent) const override; + + virtual NonnullRefPtrVector declarations() const override; + + NonnullRefPtrVector m_statements; +}; + +class Comment final : public Statement { +public: + Comment(ASTNode* parent, Optional start, Optional end) + : Statement(parent, start, end) + { + } + + virtual ~Comment() override = default; + virtual const char* class_name() const override { return "Comment"; } +}; +} diff --git a/Userland/Libraries/LibCpp/CMakeLists.txt b/Userland/Libraries/LibCpp/CMakeLists.txt index f9e022bddd..73eaf2bb4d 100644 --- a/Userland/Libraries/LibCpp/CMakeLists.txt +++ b/Userland/Libraries/LibCpp/CMakeLists.txt @@ -1,5 +1,7 @@ set(SOURCES + AST.cpp Lexer.cpp + Parser.cpp ) serenity_lib(LibCpp cpp) diff --git a/Userland/Libraries/LibCpp/Lexer.cpp b/Userland/Libraries/LibCpp/Lexer.cpp index 831822e5a5..5273f87dd4 100644 --- a/Userland/Libraries/LibCpp/Lexer.cpp +++ b/Userland/Libraries/LibCpp/Lexer.cpp @@ -581,12 +581,13 @@ Vector Lexer::lex() commit_token(Token::Type::IncludePath); begin_token(); } + } else { + while (peek() && peek() != '\n') + consume(); + + commit_token(Token::Type::PreprocessorStatement); } - while (peek() && peek() != '\n') - consume(); - - commit_token(Token::Type::PreprocessorStatement); continue; } if (ch == '/' && peek(1) == '/') { @@ -786,4 +787,17 @@ Vector Lexer::lex() return tokens; } +bool Position::operator<(const Position& other) const +{ + return line < other.line || (line == other.line && column < other.column); +} +bool Position::operator>(const Position& other) const +{ + return !(*this < other) && !(*this == other); +} +bool Position::operator==(const Position& other) const +{ + return line == other.line && column == other.column; +} + } diff --git a/Userland/Libraries/LibCpp/Lexer.h b/Userland/Libraries/LibCpp/Lexer.h index 351dee8aa5..3e7188f9ae 100644 --- a/Userland/Libraries/LibCpp/Lexer.h +++ b/Userland/Libraries/LibCpp/Lexer.h @@ -96,11 +96,16 @@ namespace Cpp { __TOKEN(Float) \ __TOKEN(Keyword) \ __TOKEN(KnownType) \ - __TOKEN(Identifier) + __TOKEN(Identifier) \ + __TOKEN(EOF_TOKEN) struct Position { - size_t line; - size_t column; + size_t line { 0 }; + size_t column { 0 }; + + bool operator<(const Position&) const; + bool operator>(const Position&) const; + bool operator==(const Position&) const; }; struct Token { @@ -110,9 +115,9 @@ struct Token { #undef __TOKEN }; - const char* to_string() const + static const char* type_to_string(Type t) { - switch (m_type) { + switch (t) { #define __TOKEN(x) \ case Type::x: \ return #x; @@ -122,6 +127,14 @@ struct Token { ASSERT_NOT_REACHED(); } + const char* to_string() const + { + return type_to_string(m_type); + } + Position start() const { return m_start; } + Position end() const { return m_end; } + Type type() const { return m_type; } + Type m_type { Type::Unknown }; Position m_start; Position m_end; diff --git a/Userland/Libraries/LibCpp/Parser.cpp b/Userland/Libraries/LibCpp/Parser.cpp new file mode 100644 index 0000000000..a3447d1b61 --- /dev/null +++ b/Userland/Libraries/LibCpp/Parser.cpp @@ -0,0 +1,1000 @@ +/* + * Copyright (c) 2021, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// #define CPP_DEBUG + +#ifdef CPP_DEBUG +# define DEBUG_SPAM +#endif + +#include "Parser.h" +#include "AK/LogStream.h" +#include "AST.h" +#include +#include +#include + +namespace Cpp { + +Parser::Parser(const StringView& program) + : m_program(program) + , m_lines(m_program.split_view("\n", true)) +{ + Lexer lexer(m_program); + for (auto& token : lexer.lex()) { + if (token.m_type == Token::Type::Whitespace) + continue; + m_tokens.append(move(token)); + } +#ifdef CPP_DEBUG + dbgln("Program:"); + dbgln("{}", m_program); + dbgln("Tokens:"); + for (auto& token : m_tokens) { + dbgln("{}", token.to_string()); + } +#endif +} + +NonnullRefPtr Parser::parse() +{ + SCOPE_LOGGER(); + auto unit = create_root_ast_node(m_tokens.first().m_start, m_tokens.last().m_end); + while (!done()) { + if (match_comment()) { + consume(Token::Type::Comment); + continue; + } + + if (match_preprocessor()) { + consume_preprocessor(); + continue; + } + + auto declaration = match_declaration(); + if (declaration.has_value()) { + unit->append(parse_declaration(*unit, declaration.value())); + continue; + } + + error("unexpected token"); + consume(); + } + return unit; +} + +Optional Parser::match_declaration() +{ + switch (m_state.context) { + case Context::InTranslationUnit: + return match_declaration_in_translation_unit(); + case Context::InFunctionDefinition: + return match_declaration_in_function_definition(); + default: + error("unexpected context"); + return {}; + } +} + +NonnullRefPtr Parser::parse_declaration(ASTNode& parent, DeclarationType declaration_type) +{ + switch (declaration_type) { + case DeclarationType::Function: + return parse_function_declaration(parent); + case DeclarationType::Variable: + return parse_variable_declaration(parent); + case DeclarationType::Enum: + return parse_enum_declaration(parent); + case DeclarationType::Struct: + return parse_struct_or_class_declaration(parent, StructOrClassDeclaration::Type::Struct); + default: + error("unexpected declaration type"); + return create_ast_node(parent, position(), position()); + } +} + +NonnullRefPtr Parser::parse_function_declaration(ASTNode& parent) +{ + auto func = create_ast_node(parent, position(), {}); + + auto return_type_token = consume(Token::Type::KnownType); + auto function_name = consume(Token::Type::Identifier); + consume(Token::Type::LeftParen); + auto parameters = parse_parameter_list(*func); + consume(Token::Type::RightParen); + + RefPtr body; + Position func_end {}; + if (peek(Token::Type::LeftCurly).has_value()) { + body = parse_function_definition(*func); + func_end = body->end(); + } else { + func_end = position(); + consume(Token::Type::Semicolon); + } + + func->m_name = text_of_token(function_name); + func->m_return_type = create_ast_node(*func, return_type_token.m_start, return_type_token.m_end, text_of_token(return_type_token)); + if (parameters.has_value()) + func->m_parameters = move(parameters.value()); + func->m_definition = move(body); + func->set_end(func_end); + return func; +} + +NonnullRefPtr Parser::parse_function_definition(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto func = create_ast_node(parent, position(), {}); + consume(Token::Type::LeftCurly); + while (!eof() && peek().m_type != Token::Type::RightCurly) { + func->statements().append(parse_statement(func)); + } + func->set_end(position()); + if (!eof()) + consume(Token::Type::RightCurly); + return func; +} + +NonnullRefPtr Parser::parse_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + ArmedScopeGuard consume_semicolumn([this]() { + consume(Token::Type::Semicolon); + }); + + if (match_block_statement()) { + consume_semicolumn.disarm(); + return parse_block_statement(parent); + } + if (match_comment()) { + consume_semicolumn.disarm(); + return parse_comment(parent); + } + if (match_variable_declaration()) { + return parse_variable_declaration(parent); + } + if (match_expression()) { + return parse_expression(parent); + } + if (match_keyword("return")) { + return parse_return_statement(parent); + } + if (match_keyword("for")) { + consume_semicolumn.disarm(); + return parse_for_statement(parent); + } else { + error("unexpected statement type"); + consume_semicolumn.disarm(); + consume(); + return create_ast_node(parent, position(), position()); + } +} + +NonnullRefPtr Parser::parse_comment(ASTNode& parent) +{ + auto comment = create_ast_node(parent, position(), {}); + consume(Token::Type::Comment); + comment->set_end(position()); + return comment; +} + +bool Parser::match_block_statement() +{ + return peek().type() == Token::Type::LeftCurly; +} + +NonnullRefPtr Parser::parse_block_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto block_statement = create_ast_node(parent, position(), {}); + consume(Token::Type::LeftCurly); + while (peek().type() != Token::Type::RightCurly) { + block_statement->m_statements.append(parse_statement(*block_statement)); + } + consume(Token::Type::RightCurly); + block_statement->set_end(position()); + return block_statement; +} + +bool Parser::match_variable_declaration() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + + if (!peek(Token::Type::KnownType).has_value() && !peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (!peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (match(Token::Type::Equals)) { + consume(Token::Type::Equals); + if (!match_expression()) { + error("initial value of variable is not an expression"); + return false; + } + } + + return true; +} + +NonnullRefPtr Parser::parse_variable_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto var = create_ast_node(parent, position(), {}); + auto type_token = consume(); + if (type_token.type() != Token::Type::KnownType && type_token.type() != Token::Type::Identifier) { + error("unexpected token for variable type"); + var->set_end(type_token.end()); + return var; + } + auto identifier_token = consume(Token::Type::Identifier); + RefPtr initial_value; + + if (match(Token::Type::Equals)) { + consume(Token::Type::Equals); + initial_value = parse_expression(var); + } + + var->set_end(position()); + var->m_type = create_ast_node(var, type_token.m_start, type_token.m_end, text_of_token(type_token)); + var->m_name = text_of_token(identifier_token); + var->m_initial_value = move(initial_value); + + return var; +} + +NonnullRefPtr Parser::parse_expression(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto expression = parse_primary_expression(parent); + // TODO: remove eof() logic, should still work without it + if (eof() || match(Token::Type::Semicolon)) { + return expression; + } + + NonnullRefPtrVector secondary_expressions; + + while (match_secondary_expression()) { + // FIXME: Handle operator precedence + expression = parse_secondary_expression(parent, expression); + secondary_expressions.append(expression); + } + + for (size_t i = 0; secondary_expressions.size() != 0 && i < secondary_expressions.size() - 1; ++i) { + secondary_expressions[i].set_parent(secondary_expressions[i + 1]); + } + + return expression; +} + +bool Parser::match_secondary_expression() +{ + auto type = peek().type(); + return type == Token::Type::Plus + || type == Token::Type::PlusEquals + || type == Token::Type::Minus + || type == Token::Type::MinusEquals + || type == Token::Type::Asterisk + || type == Token::Type::AsteriskEquals + || type == Token::Type::Percent + || type == Token::Type::PercentEquals + || type == Token::Type::Equals + || type == Token::Type::Greater + || type == Token::Type::Greater + || type == Token::Type::Less + || type == Token::Type::LessEquals + || type == Token::Type::Dot + || type == Token::Type::PlusPlus + || type == Token::Type::MinusMinus + || type == Token::Type::And + || type == Token::Type::AndEquals + || type == Token::Type::Pipe + || type == Token::Type::PipeEquals + || type == Token::Type::Caret + || type == Token::Type::CaretEquals + || type == Token::Type::LessLess + || type == Token::Type::LessLessEquals + || type == Token::Type::GreaterGreater + || type == Token::Type::GreaterGreaterEquals + || type == Token::Type::AndAnd + || type == Token::Type::PipePipe; +} + +NonnullRefPtr Parser::parse_primary_expression(ASTNode& parent) +{ + SCOPE_LOGGER(); + // TODO: remove eof() logic, should still work without it + if (eof()) { + auto node = create_ast_node(parent, position(), position()); + return node; + } + + if (match_unary_expression()) + return parse_unary_expression(parent); + + if (match_literal()) { + return parse_literal(parent); + } + switch (peek().type()) { + case Token::Type::Identifier: { + if (match_function_call()) + return parse_function_call(parent); + auto token = consume(); + return create_ast_node(parent, token.m_start, token.m_end, text_of_token(token)); + } + default: { + error("could not parse primary expression"); + auto token = consume(); + return create_ast_node(parent, token.m_start, token.m_end); + } + } +} + +bool Parser::match_literal() +{ + switch (peek().type()) { + case Token::Type::Integer: + return true; + case Token::Type::DoubleQuotedString: + return true; + case Token::Type::Keyword: { + return match_boolean_literal(); + } + default: + return false; + } +} + +bool Parser::match_unary_expression() +{ + auto type = peek().type(); + return type == Token::Type::PlusPlus + || type == Token::Type::MinusMinus + || type == Token::Type::ExclamationMark + || type == Token::Type::Tilde + || type == Token::Type::Plus + || type == Token::Type::Minus; +} + +NonnullRefPtr Parser::parse_unary_expression(ASTNode& parent) +{ + auto unary_exp = create_ast_node(parent, position(), {}); + auto op_token = consume(); + UnaryOp op { UnaryOp::Invalid }; + switch (op_token.type()) { + case Token::Type::Minus: + op = UnaryOp::Minus; + break; + case Token::Type::Plus: + op = UnaryOp::Plus; + break; + case Token::Type::ExclamationMark: + op = UnaryOp::Not; + break; + case Token::Type::Tilde: + op = UnaryOp::BitwiseNot; + break; + case Token::Type::PlusPlus: + op = UnaryOp::PlusPlus; + break; + default: + break; + } + unary_exp->m_op = op; + auto lhs = parse_expression(*unary_exp); + unary_exp->m_lhs = lhs; + unary_exp->set_end(lhs->end()); + return unary_exp; +} + +NonnullRefPtr Parser::parse_literal(ASTNode& parent) +{ + switch (peek().type()) { + case Token::Type::Integer: { + auto token = consume(); + return create_ast_node(parent, token.m_start, token.m_end, text_of_token(token)); + } + case Token::Type::DoubleQuotedString: { + return parse_string_literal(parent); + } + case Token::Type::Keyword: { + if (match_boolean_literal()) + return parse_boolean_literal(parent); + [[fallthrough]]; + } + default: { + error("could not parse literal"); + auto token = consume(); + return create_ast_node(parent, token.m_start, token.m_end); + } + } +} + +NonnullRefPtr Parser::parse_secondary_expression(ASTNode& parent, NonnullRefPtr lhs) +{ + SCOPE_LOGGER(); + switch (peek().m_type) { + case Token::Type::Plus: + return parse_binary_expression(parent, lhs, BinaryOp::Addition); + case Token::Type::Less: + return parse_binary_expression(parent, lhs, BinaryOp::LessThan); + case Token::Type::Equals: + return parse_assignment_expression(parent, lhs, AssignmentOp::Assignment); + case Token::Type::Dot: { + consume(); + auto exp = create_ast_node(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_object = move(lhs); + auto property_token = consume(Token::Type::Identifier); + exp->m_property = create_ast_node(*exp, property_token.start(), property_token.end(), text_of_token(property_token)); + exp->set_end(property_token.end()); + return exp; + } + default: { + error(String::formatted("unexpected operator for expression. operator: {}", peek().to_string())); + auto token = consume(); + return create_ast_node(parent, token.start(), token.end()); + } + } +} + +NonnullRefPtr Parser::parse_binary_expression(ASTNode& parent, NonnullRefPtr lhs, BinaryOp op) +{ + consume(); // Operator + auto exp = create_ast_node(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_op = op; + exp->m_lhs = move(lhs); + auto rhs = parse_expression(exp); + exp->set_end(rhs->end()); + exp->m_rhs = move(rhs); + return exp; +} + +NonnullRefPtr Parser::parse_assignment_expression(ASTNode& parent, NonnullRefPtr lhs, AssignmentOp op) +{ + consume(); // Operator + auto exp = create_ast_node(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_op = op; + exp->m_lhs = move(lhs); + auto rhs = parse_expression(exp); + exp->set_end(rhs->end()); + exp->m_rhs = move(rhs); + return exp; +} + +Optional Parser::match_declaration_in_translation_unit() +{ + if (match_function_declaration()) + return DeclarationType::Function; + if (match_enum_declaration()) + return DeclarationType::Enum; + if (match_struct_declaration()) + return DeclarationType::Struct; + return {}; +} + +bool Parser::match_enum_declaration() +{ + return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "enum"; +} + +bool Parser::match_struct_declaration() +{ + return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "struct"; +} + +bool Parser::match_function_declaration() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + + if (!peek(Token::Type::KnownType).has_value()) + return false; + consume(); + + if (!peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (!peek(Token::Type::LeftParen).has_value()) + return false; + consume(); + + while (consume().m_type != Token::Type::RightParen && !eof()) { }; + + if (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value()) + return true; + + return false; +} + +Optional> Parser::parse_parameter_list(ASTNode& parent) +{ + SCOPE_LOGGER(); + NonnullRefPtrVector parameters; + while (peek().m_type != Token::Type::RightParen && !eof()) { + auto type = parse_type(parent); + + auto name_identifier = peek(Token::Type::Identifier); + if (name_identifier.has_value()) + consume(Token::Type::Identifier); + + StringView name; + if (name_identifier.has_value()) + name = text_of_token(name_identifier.value()); + + auto param = create_ast_node(parent, type->start(), name_identifier.has_value() ? name_identifier.value().m_end : type->end(), name); + + param->m_type = move(type); + parameters.append(move(param)); + if (peek(Token::Type::Comma).has_value()) + consume(Token::Type::Comma); + } + return parameters; +} + +bool Parser::match_comment() +{ + return match(Token::Type::Comment); +} + +bool Parser::match_whitespace() +{ + return match(Token::Type::Whitespace); +} + +bool Parser::match_preprocessor() +{ + return match(Token::Type::PreprocessorStatement) || match(Token::Type::IncludeStatement); +} + +void Parser::consume_preprocessor() +{ + SCOPE_LOGGER(); + switch (peek().type()) { + case Token::Type::PreprocessorStatement: + consume(); + break; + case Token::Type::IncludeStatement: + consume(); + consume(Token::Type::IncludePath); + break; + default: + error("unexpected token while parsing preprocessor statement"); + consume(); + } +} + +Optional Parser::consume_whitespace() +{ + SCOPE_LOGGER(); + return consume(Token::Type::Whitespace); +} + +Token Parser::consume(Token::Type type) +{ + auto token = consume(); + if (token.type() != type) + error(String::formatted("expected {} at {}:{}, found: {}", Token::type_to_string(type), token.start().line, token.start().column, Token::type_to_string(token.type()))); + return token; +} + +bool Parser::match(Token::Type type) +{ + return peek().m_type == type; +} + +Token Parser::consume() +{ + if (eof()) { + error("C++ Parser: out of tokens"); + return { Token::Type::EOF_TOKEN, position(), position() }; + } + return m_tokens[m_state.token_index++]; +} + +Token Parser::peek() const +{ + if (eof()) { + return { Token::Type::EOF_TOKEN, position(), position() }; + } + return m_tokens[m_state.token_index]; +} + +Optional Parser::peek(Token::Type type) const +{ + auto token = peek(); + if (token.m_type == type) + return token; + return {}; +} + +void Parser::save_state() +{ + m_saved_states.append(m_state); +} + +void Parser::load_state() +{ + m_state = m_saved_states.take_last(); +} + +Optional Parser::match_declaration_in_function_definition() +{ + ASSERT_NOT_REACHED(); +} + +bool Parser::done() +{ + return m_state.token_index == m_tokens.size(); +} + +StringView Parser::text_of_token(const Cpp::Token& token) +{ + ASSERT(token.m_start.line == token.m_end.line); + ASSERT(token.m_start.column <= token.m_end.column); + return m_lines[token.m_start.line].substring_view(token.m_start.column, token.m_end.column - token.m_start.column + 1); +} + +StringView Parser::text_of_node(const ASTNode& node) const +{ + if (node.start().line == node.end().line) { + ASSERT(node.start().column <= node.end().column); + return m_lines[node.start().line].substring_view(node.start().column, node.end().column - node.start().column + 1); + } + + auto index_of_position([this](auto position) { + size_t start_index = 0; + for (size_t line = 0; line < position.line; ++line) { + start_index += m_lines[line].length() + 1; + } + start_index += position.column; + return start_index; + }); + auto start_index = index_of_position(node.start()); + auto end_index = index_of_position(node.end()); + ASSERT(end_index >= start_index); + return m_program.substring_view(start_index, end_index - start_index); +} + +void Parser::error(StringView message) +{ + SCOPE_LOGGER(); + if (message.is_null() || message.is_empty()) + message = ""; + String formatted_message; + if (m_state.token_index >= m_tokens.size()) { + formatted_message = String::formatted("C++ Parsed error on EOF.{}", message); + } else { + formatted_message = String::formatted("C++ Parser error: {}. token: {} ({}:{})", + message, + m_state.token_index < m_tokens.size() ? text_of_token(m_tokens[m_state.token_index]) : "EOF", + m_tokens[m_state.token_index].m_start.line, + m_tokens[m_state.token_index].m_start.column); + } + m_errors.append(formatted_message); + dbgln("{}", formatted_message); +} + +bool Parser::match_expression() +{ + auto token_type = peek().m_type; + return token_type == Token::Type::Integer + || token_type == Token::Type::Float + || token_type == Token::Type::Identifier + || match_unary_expression(); +} + +bool Parser::eof() const +{ + return m_state.token_index >= m_tokens.size(); +} + +Position Parser::position() const +{ + if (eof()) + return m_tokens.last().m_end; + return peek().m_start; +} + +RefPtr Parser::eof_node() const +{ + ASSERT(m_tokens.size()); + return node_at(m_tokens.last().m_end); +} + +RefPtr Parser::node_at(Position pos) const +{ + ASSERT(!m_tokens.is_empty()); + RefPtr match_node; + for (auto& node : m_nodes) { + if (node.start() > pos || node.end() < pos) + continue; + if (!match_node) + match_node = node; + else if (node_span_size(node) < node_span_size(*match_node)) + match_node = node; + } + return match_node; +} + +Optional Parser::token_at(Position pos) const +{ + for (auto& token : m_tokens) { + if (token.start() > pos || token.end() < pos) + continue; + return token; + } + return {}; +} + +size_t Parser::node_span_size(const ASTNode& node) const +{ + if (node.start().line == node.end().line) + return node.end().column - node.start().column; + + size_t span_size = m_lines[node.start().line].length() - node.start().column; + for (size_t line = node.start().line + 1; line < node.end().line; ++line) { + span_size += m_lines[line].length(); + } + return span_size + m_lines[node.end().line].length() - node.end().column; +} + +void Parser::print_tokens() const +{ + for (auto& token : m_tokens) { + dbgln("{}", token.to_string()); + } +} + +bool Parser::match_function_call() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + if (!match(Token::Type::Identifier)) + return false; + consume(); + return match(Token::Type::LeftParen); +} + +NonnullRefPtr Parser::parse_function_call(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto call = create_ast_node(parent, position(), {}); + auto name_identifier = consume(Token::Type::Identifier); + call->m_name = text_of_token(name_identifier); + + NonnullRefPtrVector args; + consume(Token::Type::LeftParen); + while (peek().type() != Token::Type::RightParen && !eof()) { + args.append(parse_expression(*call)); + if (peek().type() == Token::Type::Comma) + consume(Token::Type::Comma); + } + consume(Token::Type::RightParen); + call->m_arguments = move(args); + call->set_end(position()); + return call; +} + +NonnullRefPtr Parser::parse_string_literal(ASTNode& parent) +{ + SCOPE_LOGGER(); + Optional start_token_index; + Optional end_token_index; + while (!eof()) { + auto token = peek(); + if (token.type() != Token::Type::DoubleQuotedString && token.type() != Token::Type::EscapeSequence) { + ASSERT(start_token_index.has_value()); + // TODO: don't consume + end_token_index = m_state.token_index - 1; + break; + } + if (!start_token_index.has_value()) + start_token_index = m_state.token_index; + consume(); + } + ASSERT(start_token_index.has_value()); + ASSERT(end_token_index.has_value()); + Token start_token = m_tokens[start_token_index.value()]; + Token end_token = m_tokens[end_token_index.value()]; + ASSERT(start_token.start().line == end_token.start().line); + + auto text = m_lines[start_token.start().line].substring_view(start_token.start().column, end_token.end().column - start_token.start().column + 1); + auto string_literal = create_ast_node(parent, start_token.start(), end_token.end()); + string_literal->m_value = text; + return string_literal; +} + +NonnullRefPtr Parser::parse_return_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto return_statement = create_ast_node(parent, position(), {}); + consume(Token::Type::Keyword); + auto expression = parse_expression(*return_statement); + return_statement->m_value = expression; + return_statement->set_end(expression->end()); + return return_statement; +} + +NonnullRefPtr Parser::parse_enum_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto enum_decl = create_ast_node(parent, position(), {}); + consume_keyword("enum"); + auto name_token = consume(Token::Type::Identifier); + enum_decl->m_name = text_of_token(name_token); + consume(Token::Type::LeftCurly); + while (peek().type() != Token::Type::RightCurly && !eof()) { + enum_decl->m_entries.append(text_of_token(consume(Token::Type::Identifier))); + if (peek().type() != Token::Type::Comma) { + break; + } + consume(Token::Type::Comma); + } + consume(Token::Type::RightCurly); + consume(Token::Type::Semicolon); + enum_decl->set_end(position()); + return enum_decl; +} + +Token Parser::consume_keyword(const String& keyword) +{ + auto token = consume(); + if (token.type() != Token::Type::Keyword) { + error(String::formatted("unexpected token: {}, expected Keyword", token.to_string())); + return token; + } + if (text_of_token(token) != keyword) { + error(String::formatted("unexpected keyword: {}, expected {}", text_of_token(token), keyword)); + return token; + } + return token; +} + +bool Parser::match_keyword(const String& keyword) +{ + auto token = peek(); + if (token.type() != Token::Type::Keyword) { + return false; + } + if (text_of_token(token) != keyword) { + return false; + } + return true; +} + +NonnullRefPtr Parser::parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type type) +{ + SCOPE_LOGGER(); + auto decl = create_ast_node(parent, position(), {}, type); + switch (type) { + case StructOrClassDeclaration::Type::Struct: + consume_keyword("struct"); + break; + case StructOrClassDeclaration::Type::Class: + consume_keyword("class"); + break; + } + auto name_token = consume(Token::Type::Identifier); + decl->m_name = text_of_token(name_token); + + consume(Token::Type::LeftCurly); + + while (peek().type() != Token::Type::RightCurly && !eof()) { + decl->m_members.append(parse_member_declaration(*decl)); + } + + consume(Token::Type::RightCurly); + consume(Token::Type::Semicolon); + decl->set_end(position()); + return decl; +} + +NonnullRefPtr Parser::parse_member_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto member_decl = create_ast_node(parent, position(), {}); + auto type_token = consume(); + auto identifier_token = consume(Token::Type::Identifier); + RefPtr initial_value; + + if (match(Token::Type::LeftCurly)) { + consume(Token::Type::LeftCurly); + initial_value = parse_expression(*member_decl); + consume(Token::Type::RightCurly); + } + + member_decl->m_type = create_ast_node(*member_decl, type_token.m_start, type_token.m_end, text_of_token(type_token)); + member_decl->m_name = text_of_token(identifier_token); + member_decl->m_initial_value = move(initial_value); + consume(Token::Type::Semicolon); + member_decl->set_end(position()); + + return member_decl; +} + +NonnullRefPtr Parser::parse_boolean_literal(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto token = consume(Token::Type::Keyword); + auto text = text_of_token(token); + // text == "true" || text == "false"; + bool value = (text == "true"); + return create_ast_node(parent, token.start(), token.end(), value); +} + +bool Parser::match_boolean_literal() +{ + auto token = peek(); + if (token.type() != Token::Type::Keyword) + return false; + auto text = text_of_token(token); + return text == "true" || text == "false"; +} + +NonnullRefPtr Parser::parse_type(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto token = consume(); + auto type = create_ast_node(parent, token.start(), token.end(), text_of_token(token)); + if (token.type() != Token::Type::KnownType && token.type() != Token::Type::Identifier) { + error(String::formatted("unexpected token for type: {}", token.to_string())); + return type; + } + while (peek().type() == Token::Type::Asterisk) { + auto asterisk = consume(); + auto ptr = create_ast_node(type, asterisk.start(), asterisk.end()); + ptr->m_pointee = type; + type = ptr; + } + return type; +} + +NonnullRefPtr Parser::parse_for_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto for_statement = create_ast_node(parent, position(), {}); + consume(Token::Type::Keyword); + consume(Token::Type::LeftParen); + for_statement->m_init = parse_variable_declaration(*for_statement); + consume(Token::Type::Semicolon); + for_statement->m_test = parse_expression(*for_statement); + consume(Token::Type::Semicolon); + for_statement->m_update = parse_expression(*for_statement); + consume(Token::Type::RightParen); + for_statement->m_body = parse_statement(*for_statement); + for_statement->set_end(for_statement->m_body->end()); + return for_statement; +} + +} diff --git a/Userland/Libraries/LibCpp/Parser.h b/Userland/Libraries/LibCpp/Parser.h new file mode 100644 index 0000000000..013d1ff697 --- /dev/null +++ b/Userland/Libraries/LibCpp/Parser.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2021, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "AK/NonnullRefPtr.h" +#include "AST.h" +#include + +namespace Cpp { + +class Parser final { +public: + explicit Parser(const StringView&); + ~Parser() = default; + + NonnullRefPtr parse(); + bool eof() const; + + RefPtr eof_node() const; + RefPtr node_at(Position) const; + Optional token_at(Position) const; + RefPtr root_node() const { return m_root_node; } + StringView text_of_node(const ASTNode&) const; + void print_tokens() const; + Vector errors() const { return m_errors; } + +private: + enum class DeclarationType { + Function, + Variable, + Enum, + Struct, + }; + + bool done(); + + Optional match_declaration(); + Optional match_declaration_in_translation_unit(); + Optional match_declaration_in_function_definition(); + bool match_function_declaration(); + bool match_comment(); + bool match_preprocessor(); + bool match_whitespace(); + bool match_variable_declaration(); + bool match_expression(); + bool match_function_call(); + bool match_secondary_expression(); + bool match_enum_declaration(); + bool match_struct_declaration(); + bool match_literal(); + bool match_unary_expression(); + bool match_boolean_literal(); + bool match_keyword(const String&); + bool match_block_statement(); + + Optional> parse_parameter_list(ASTNode& parent); + Optional consume_whitespace(); + void consume_preprocessor(); + + NonnullRefPtr parse_declaration(ASTNode& parent, DeclarationType); + NonnullRefPtr parse_function_declaration(ASTNode& parent); + NonnullRefPtr parse_function_definition(ASTNode& parent); + NonnullRefPtr parse_statement(ASTNode& parent); + NonnullRefPtr parse_variable_declaration(ASTNode& parent); + NonnullRefPtr parse_expression(ASTNode& parent); + NonnullRefPtr parse_primary_expression(ASTNode& parent); + NonnullRefPtr parse_secondary_expression(ASTNode& parent, NonnullRefPtr lhs); + NonnullRefPtr parse_function_call(ASTNode& parent); + NonnullRefPtr parse_string_literal(ASTNode& parent); + NonnullRefPtr parse_return_statement(ASTNode& parent); + NonnullRefPtr parse_enum_declaration(ASTNode& parent); + NonnullRefPtr parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type); + NonnullRefPtr parse_member_declaration(ASTNode& parent); + NonnullRefPtr parse_literal(ASTNode& parent); + NonnullRefPtr parse_unary_expression(ASTNode& parent); + NonnullRefPtr parse_boolean_literal(ASTNode& parent); + NonnullRefPtr parse_type(ASTNode& parent); + NonnullRefPtr parse_binary_expression(ASTNode& parent, NonnullRefPtr lhs, BinaryOp); + NonnullRefPtr parse_assignment_expression(ASTNode& parent, NonnullRefPtr lhs, AssignmentOp); + NonnullRefPtr parse_for_statement(ASTNode& parent); + NonnullRefPtr parse_block_statement(ASTNode& parent); + NonnullRefPtr parse_comment(ASTNode& parent); + + bool match(Token::Type); + Token consume(Token::Type); + Token consume(); + Token consume_keyword(const String&); + Token peek() const; + Optional peek(Token::Type) const; + Position position() const; + + void save_state(); + void load_state(); + + enum class Context { + InTranslationUnit, + InFunctionDefinition, + }; + + struct State { + Context context { Context::InTranslationUnit }; + size_t token_index { 0 }; + }; + + StringView text_of_token(const Cpp::Token& token); + void error(StringView message = {}); + + size_t node_span_size(const ASTNode& node) const; + + template + NonnullRefPtr + create_ast_node(ASTNode& parent, const Position& start, Optional end, Args&&... args) + { + auto node = adopt(*new T(&parent, start, end, forward(args)...)); + m_nodes.append(node); + return node; + } + + NonnullRefPtr + create_root_ast_node(const Position& start, Position end) + { + auto node = adopt(*new TranslationUnit(nullptr, start, end)); + m_nodes.append(node); + m_root_node = node; + return node; + } + + StringView m_program; + Vector m_lines; + Vector m_tokens; + State m_state; + Vector m_saved_states; + RefPtr m_root_node; + NonnullRefPtrVector m_nodes; + Vector m_errors; +}; + +} diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 9352734b1b..cea432a38d 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -47,3 +47,4 @@ target_link_libraries(test-web LibWeb) target_link_libraries(tt LibPthread) target_link_libraries(grep LibRegex) target_link_libraries(gunzip LibCompress) +target_link_libraries(CppParserTest LibCpp) diff --git a/Userland/Utilities/CppParserTest.cpp b/Userland/Utilities/CppParserTest.cpp new file mode 100644 index 0000000000..2fd78329aa --- /dev/null +++ b/Userland/Utilities/CppParserTest.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "AK/Forward.h" +#include "LibCpp/AST.h" +#include +#include +#include + +int main(int argc, char** argv) +{ + Core::ArgsParser args_parser; + const char* path = nullptr; + bool tokens_mode = false; + args_parser.add_option(tokens_mode, "Print Tokens", "tokens", 'T'); + args_parser.add_positional_argument(path, "Cpp File", "cpp-file", Core::ArgsParser::Required::No); + args_parser.parse(argc, argv); + + if (!path) + path = "Source/little/main.cpp"; + auto file = Core::File::construct(path); + if (!file->open(Core::IODevice::ReadOnly)) { + perror("open"); + exit(1); + } + auto content = file->read_all(); + StringView content_view(content); + ::Cpp::Parser parser(content_view); + if (tokens_mode) { + parser.print_tokens(); + return 0; + } + auto root = parser.parse(); + + dbgln("Parser errors:"); + for (auto& error : parser.errors()) { + dbgln("{}", error); + } + + root->dump(0); +}