From f3a9eba987a8346dc2c13cedac3865eee38a10a4 Mon Sep 17 00:00:00 2001 From: Stephan Unverwerth Date: Wed, 11 Mar 2020 19:27:43 +0100 Subject: [PATCH] LibJS: Add Javascript lexer and parser This adds a basic Javascript lexer and parser. It can parse the currently existing demo programs. More work needs to be done to turn it into a complete parser than can parse arbitrary JS Code. The lexer outputs tokens with preceeding whitespace and comments in the trivia member. This should allow us to generate the exact source code by concatenating the generated tokens. The parser is written in a way that it always returns a complete syntax tree. Error conditions are represented as nodes in the tree. This simplifies the code and allows it to be used as an early stage parser, e.g for parsing JS documents in an IDE while editing the source code.: --- Libraries/LibJS/AST.cpp | 16 +- Libraries/LibJS/AST.h | 56 +++++-- Libraries/LibJS/Forward.h | 1 + Libraries/LibJS/Lexer.cpp | 234 ++++++++++++++++++++++++++++++ Libraries/LibJS/Lexer.h | 61 ++++++++ Libraries/LibJS/Makefile | 3 + Libraries/LibJS/Parser.cpp | 289 +++++++++++++++++++++++++++++++++++++ Libraries/LibJS/Parser.h | 68 +++++++++ Libraries/LibJS/Token.cpp | 182 +++++++++++++++++++++++ Libraries/LibJS/Token.h | 122 ++++++++++++++++ Userland/js.cpp | 73 +++++++--- 11 files changed, 1069 insertions(+), 36 deletions(-) create mode 100644 Libraries/LibJS/Lexer.cpp create mode 100644 Libraries/LibJS/Lexer.h create mode 100644 Libraries/LibJS/Parser.cpp create mode 100644 Libraries/LibJS/Parser.h create mode 100644 Libraries/LibJS/Token.cpp create mode 100644 Libraries/LibJS/Token.h diff --git a/Libraries/LibJS/AST.cpp b/Libraries/LibJS/AST.cpp index 3cb34f15bc..9ff915d931 100644 --- a/Libraries/LibJS/AST.cpp +++ b/Libraries/LibJS/AST.cpp @@ -44,6 +44,11 @@ Value FunctionDeclaration::execute(Interpreter& interpreter) const return Value(function); } +Value ExpressionStatement::execute(Interpreter& interpreter) const +{ + return m_expression->execute(interpreter); +} + Value CallExpression::execute(Interpreter& interpreter) const { if (name() == "$gc") { @@ -61,7 +66,7 @@ Value CallExpression::execute(Interpreter& interpreter) const Value ReturnStatement::execute(Interpreter& interpreter) const { - auto value = argument().execute(interpreter); + auto value = argument() ? argument()->execute(interpreter) : js_undefined(); interpreter.do_return(); return value; } @@ -310,7 +315,8 @@ void FunctionDeclaration::dump(int indent) const void ReturnStatement::dump(int indent) const { ASTNode::dump(indent); - argument().dump(indent + 1); + if (argument()) + argument()->dump(indent + 1); } void IfStatement::dump(int indent) const @@ -413,6 +419,12 @@ void ObjectExpression::dump(int indent) const ASTNode::dump(indent); } +void ExpressionStatement::dump(int indent) const +{ + ASTNode::dump(indent); + m_expression->dump(indent + 1); +} + Value ObjectExpression::execute(Interpreter& interpreter) const { return Value(interpreter.heap().allocate()); diff --git a/Libraries/LibJS/AST.h b/Libraries/LibJS/AST.h index e61285d9cd..97d318dbb6 100644 --- a/Libraries/LibJS/AST.h +++ b/Libraries/LibJS/AST.h @@ -48,7 +48,31 @@ protected: private: }; -class ScopeNode : public ASTNode { +class Statement : public ASTNode { +}; + +class ErrorStatement final : public Statement { +public: + Value execute(Interpreter&) const { return js_undefined(); } + const char* class_name() const override { return "ErrorStatement"; } +}; + +class ExpressionStatement final : public Statement { +public: + ExpressionStatement(NonnullOwnPtr expression) + : m_expression(move(expression)) + { + } + + Value execute(Interpreter&) const override; + const char* class_name() const override { return "ExpressionStatement"; } + virtual void dump(int indent) const override; + +private: + NonnullOwnPtr m_expression; +}; + +class ScopeNode : public Statement { public: template T& append(Args&&... args) @@ -57,8 +81,12 @@ public: m_children.append(move(child)); return static_cast(m_children.last()); } + void append(NonnullOwnPtr child) + { + m_children.append(move(child)); + } - const NonnullOwnPtrVector& children() const { return m_children; } + const NonnullOwnPtrVector& children() const { return m_children; } virtual Value execute(Interpreter&) const override; virtual void dump(int indent) const override; @@ -66,7 +94,7 @@ protected: ScopeNode() {} private: - NonnullOwnPtrVector m_children; + NonnullOwnPtrVector m_children; }; class Program : public ScopeNode { @@ -85,7 +113,7 @@ private: virtual const char* class_name() const override { return "BlockStatement"; } }; -class FunctionDeclaration : public ASTNode { +class FunctionDeclaration : public Statement { public: FunctionDeclaration(String name, NonnullOwnPtr body) : m_name(move(name)) @@ -110,14 +138,20 @@ class Expression : public ASTNode { public: }; -class ReturnStatement : public ASTNode { +class ErrorExpression final : public Expression { public: - explicit ReturnStatement(NonnullOwnPtr argument) + Value execute(Interpreter&) const { return js_undefined(); } + const char* class_name() const override { return "ErrorExpression"; } +}; + +class ReturnStatement : public Statement { +public: + explicit ReturnStatement(OwnPtr argument) : m_argument(move(argument)) { } - const Expression& argument() const { return *m_argument; } + const Expression* argument() const { return m_argument; } virtual Value execute(Interpreter&) const override; virtual void dump(int indent) const override; @@ -125,10 +159,10 @@ public: private: virtual const char* class_name() const override { return "ReturnStatement"; } - NonnullOwnPtr m_argument; + OwnPtr m_argument; }; -class IfStatement : public ASTNode { +class IfStatement : public Statement { public: IfStatement(NonnullOwnPtr predicate, NonnullOwnPtr consequent, NonnullOwnPtr alternate) : m_predicate(move(predicate)) @@ -152,7 +186,7 @@ private: NonnullOwnPtr m_alternate; }; -class WhileStatement : public ASTNode { +class WhileStatement : public Statement { public: WhileStatement(NonnullOwnPtr predicate, NonnullOwnPtr body) : m_predicate(move(predicate)) @@ -337,7 +371,7 @@ enum class DeclarationType { Let, }; -class VariableDeclaration : public ASTNode { +class VariableDeclaration : public Statement { public: VariableDeclaration(NonnullOwnPtr name, OwnPtr initializer, DeclarationType declaration_type) : m_declaration_type(declaration_type) diff --git a/Libraries/LibJS/Forward.h b/Libraries/LibJS/Forward.h index 0af451f63e..4f38acf973 100644 --- a/Libraries/LibJS/Forward.h +++ b/Libraries/LibJS/Forward.h @@ -30,6 +30,7 @@ namespace JS { class ASTNode; class Cell; +class Expression; class Heap; class HeapBlock; class Interpreter; diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp new file mode 100644 index 0000000000..979764e3b5 --- /dev/null +++ b/Libraries/LibJS/Lexer.cpp @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Lexer.h" +#include +#include +#include +#include + +namespace JS { + +HashMap Lexer::s_keywords; +HashMap Lexer::s_two_char_tokens; +HashMap Lexer::s_single_char_tokens; + +Lexer::Lexer(StringView source) + : m_source(source) + , m_current_token(TokenType::Eof, StringView(nullptr), StringView(nullptr)) +{ + if (s_keywords.is_empty()) { + s_keywords.set("true", TokenType::BoolLiteral); + s_keywords.set("false", TokenType::BoolLiteral); + s_keywords.set("catch", TokenType::Catch); + s_keywords.set("class", TokenType::Class); + s_keywords.set("const", TokenType::Const); + s_keywords.set("delete", TokenType::Delete); + s_keywords.set("do", TokenType::Do); + s_keywords.set("else", TokenType::Else); + s_keywords.set("finally", TokenType::Finally); + s_keywords.set("function", TokenType::Function); + s_keywords.set("if", TokenType::If); + s_keywords.set("interface", TokenType::Interface); + s_keywords.set("let", TokenType::Let); + s_keywords.set("new", TokenType::New); + s_keywords.set("null", TokenType::NullLiteral); + s_keywords.set("return", TokenType::Return); + s_keywords.set("try", TokenType::Try); + s_keywords.set("var", TokenType::Var); + s_keywords.set("while", TokenType::While); + } + + if (s_two_char_tokens.is_empty()) { + s_two_char_tokens.set("+=", TokenType::PlusEquals); + s_two_char_tokens.set("-=", TokenType::MinusEquals); + s_two_char_tokens.set("*=", TokenType::AsteriskEquals); + s_two_char_tokens.set("/=", TokenType::SlashEquals); + s_two_char_tokens.set("%=", TokenType::PercentEquals); + s_two_char_tokens.set("&=", TokenType::AmpersandEquals); + s_two_char_tokens.set("|=", TokenType::PipeEquals); + s_two_char_tokens.set("&&", TokenType::DoubleAmpersand); + s_two_char_tokens.set("||", TokenType::DoublePipe); + s_two_char_tokens.set("==", TokenType::EqualsEquals); + s_two_char_tokens.set("!=", TokenType::ExclamationMarkEquals); + s_two_char_tokens.set("--", TokenType::MinusMinus); + s_two_char_tokens.set("++", TokenType::PlusPlus); + s_two_char_tokens.set("<<", TokenType::ShiftLeft); + s_two_char_tokens.set(">>", TokenType::ShiftRight); + } + + if (s_single_char_tokens.is_empty()) { + s_single_char_tokens.set('&', TokenType::Ampersand); + s_single_char_tokens.set('*', TokenType::Asterisk); + s_single_char_tokens.set('[', TokenType::BracketOpen); + s_single_char_tokens.set(']', TokenType::BracketClose); + s_single_char_tokens.set(',', TokenType::Comma); + s_single_char_tokens.set('{', TokenType::CurlyOpen); + s_single_char_tokens.set('}', TokenType::CurlyClose); + s_single_char_tokens.set('=', TokenType::Equals); + s_single_char_tokens.set('!', TokenType::ExclamationMark); + s_single_char_tokens.set('-', TokenType::Minus); + s_single_char_tokens.set('(', TokenType::ParenOpen); + s_single_char_tokens.set(')', TokenType::ParenClose); + s_single_char_tokens.set('%', TokenType::Percent); + s_single_char_tokens.set('.', TokenType::Period); + s_single_char_tokens.set('|', TokenType::Pipe); + s_single_char_tokens.set('+', TokenType::Plus); + s_single_char_tokens.set('?', TokenType::QuestionMark); + s_single_char_tokens.set(';', TokenType::Semicolon); + s_single_char_tokens.set('/', TokenType::Slash); + s_single_char_tokens.set('<', TokenType::LessThan); + s_single_char_tokens.set('>', TokenType::GreaterThan); + } + consume(); +} + +void Lexer::consume() +{ + if (is_eof()) { + m_current_char = EOF; + return; + } + + m_current_char = m_source[m_position++]; +} + +bool Lexer::is_eof() const +{ + return m_position >= m_source.length(); +} + +bool Lexer::is_identifier_start() const +{ + return isalpha(m_current_char) || m_current_char == '_' || m_current_char == '$'; +} + +bool Lexer::is_identifier_middle() const +{ + return is_identifier_start() || isdigit(m_current_char); +} + +bool Lexer::is_line_comment_start() const +{ + return m_current_char == '/' && m_position < m_source.length() && m_source[m_position] == '/'; +} + +bool Lexer::is_block_comment_start() const +{ + return m_current_char == '/' && m_position < m_source.length() && m_source[m_position] == '*'; +} + +bool Lexer::is_block_comment_end() const +{ + return m_current_char == '*' && m_position < m_source.length() && m_source[m_position] == '/'; +} + +Token Lexer::next() +{ + size_t trivia_start = m_position; + + // consume up whitespace and comments + while (true) { + if (isspace(m_current_char)) { + do { + consume(); + } while (!is_eof() && isspace(m_current_char)); + } else if (is_line_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && m_current_char != '\n'); + } else if (is_block_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && !is_block_comment_end()); + consume(); // consume * + consume(); // consume / + } else { + break; + } + } + + size_t value_start = m_position; + TokenType token_type; + + if (is_identifier_start()) { + // identifier or keyword + do { + consume(); + } while (is_identifier_middle()); + + StringView value = m_source.substring_view(value_start - 1, m_position - value_start); + auto it = s_keywords.find(value); + if (it == s_keywords.end()) { + token_type = TokenType::Identifier; + } else { + token_type = it->value; + } + } else if (isdigit(m_current_char)) { + consume(); + while (isdigit(m_current_char)) { + consume(); + } + token_type = TokenType::NumericLiteral; + } else if (m_current_char == EOF) { + token_type = TokenType::Eof; + } else { + bool found_two_char_token = false; + if (!is_eof()) { + char secondChar = m_source[m_position]; + char twoChars[] { (char)m_current_char, secondChar, 0 }; + auto it = s_two_char_tokens.find(twoChars); + if (it != s_two_char_tokens.end()) { + found_two_char_token = true; + consume(); + consume(); + token_type = it->value; + } + } + + if (!found_two_char_token) { + auto it = s_single_char_tokens.find(m_current_char); + if (it != s_single_char_tokens.end()) { + consume(); + token_type = it->value; + } else { + consume(); + token_type = TokenType::Invalid; + } + } + } + + m_current_token = Token( + token_type, + m_source.substring_view(trivia_start - 1, value_start - trivia_start), + m_source.substring_view(value_start - 1, m_position - value_start)); + + return m_current_token; +} + +} diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h new file mode 100644 index 0000000000..01d1fdc1ce --- /dev/null +++ b/Libraries/LibJS/Lexer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "Token.h" + +#include +#include +#include + +namespace JS { + +class Lexer { +public: + explicit Lexer(StringView source); + Token next(); + +private: + void consume(); + bool is_eof() const; + bool is_identifier_start() const; + bool is_identifier_middle() const; + bool is_line_comment_start() const; + bool is_block_comment_start() const; + bool is_block_comment_end() const; + + StringView m_source; + size_t m_position = 0; + Token m_current_token; + int m_current_char; + + static HashMap s_keywords; + static HashMap s_two_char_tokens; + static HashMap s_single_char_tokens; +}; + +} diff --git a/Libraries/LibJS/Makefile b/Libraries/LibJS/Makefile index d1c7452397..8e07c0c206 100644 --- a/Libraries/LibJS/Makefile +++ b/Libraries/LibJS/Makefile @@ -5,9 +5,12 @@ OBJS = \ Heap.o \ HeapBlock.o \ Interpreter.o \ + Lexer.o \ Object.o \ + Parser.o \ PrimitiveString.o \ StringObject.o \ + Token.o \ Value.o LIBRARY = libjs.a diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp new file mode 100644 index 0000000000..194bc01ddd --- /dev/null +++ b/Libraries/LibJS/Parser.cpp @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Parser.h" +#include +#include + +namespace JS { +Parser::Parser(Lexer lexer) + : m_lexer(move(lexer)) + , m_current_token(m_lexer.next()) +{ +} + +NonnullOwnPtr Parser::parse_program() +{ + auto program = make(); + while (!done()) { + if (match(TokenType::Semicolon)) { + consume(); + } else if (match_statement()) { + program->append(parse_statement()); + } else { + expected("statement"); + consume(); + } + } + return program; +} + +NonnullOwnPtr Parser::parse_statement() +{ + if (match_expression()) { + return make(parse_expression()); + } + + switch (m_current_token.type()) { + case TokenType::Function: + return parse_function_declaration(); + case TokenType::CurlyOpen: + return parse_block_statement(); + case TokenType::Return: + return parse_return_statement(); + case TokenType::Var: + return parse_variable_declaration(); + default: + m_has_errors = true; + expected("statement (missing switch case)"); + consume(); + return make(); + } +} + +NonnullOwnPtr Parser::parse_primary_expression() +{ + switch (m_current_token.type()) { + case TokenType::ParenOpen: { + consume(TokenType::ParenOpen); + auto expression = parse_expression(); + consume(TokenType::ParenClose); + return expression; + } + case TokenType::Identifier: + return make(consume().value()); + case TokenType::NumericLiteral: + return make(Value(consume().double_value())); + case TokenType::BoolLiteral: + return make(Value(consume().bool_value())); + case TokenType::CurlyOpen: + return parse_object_expression(); + default: + m_has_errors = true; + expected("primary expression (missing switch case)"); + consume(); + return make(); + } +} + +NonnullOwnPtr Parser::parse_object_expression() +{ + // FIXME: Parse actual object expression + consume(TokenType::CurlyOpen); + consume(TokenType::CurlyClose); + return make(); +} + +NonnullOwnPtr Parser::parse_expression() +{ + auto expression = parse_primary_expression(); + while (match_secondary_expression()) { + expression = parse_secondary_expression(move(expression)); + } + return expression; +} + +NonnullOwnPtr Parser::parse_secondary_expression(NonnullOwnPtr lhs) +{ + switch (m_current_token.type()) { + case TokenType::Plus: + consume(); + return make(BinaryOp::Plus, move(lhs), parse_expression()); + case TokenType::Minus: + consume(); + return make(BinaryOp::Minus, move(lhs), parse_expression()); + case TokenType::ParenOpen: + return parse_call_expression(move(lhs)); + case TokenType::Equals: + consume(); + return make(AssignmentOp::Assign, move(lhs), parse_expression()); + default: + m_has_errors = true; + expected("secondary expression (missing switch case)"); + consume(); + return make(); + } +} + +NonnullOwnPtr Parser::parse_call_expression(NonnullOwnPtr lhs) +{ + // FIXME: allow arguments + consume(TokenType::ParenOpen); + consume(TokenType::ParenClose); + + // FIXME: Allow lhs expression instead of just a string + if (lhs->is_identifier()) { + return make(static_cast(lhs.ptr())->string()); + } + + m_has_errors = true; + return make("***ERROR***"); +} + +NonnullOwnPtr Parser::parse_return_statement() +{ + consume(TokenType::Return); + if (match_expression()) { + return make(parse_expression()); + } + return make(nullptr); +} + +NonnullOwnPtr Parser::parse_block_statement() +{ + auto block = make(); + consume(TokenType::CurlyOpen); + while (!done() && !match(TokenType::CurlyClose)) { + if (match(TokenType::Semicolon)) { + consume(); + } else if (match_statement()) { + block->append(parse_statement()); + } else { + expected("statement"); + consume(); + } + } + consume(TokenType::CurlyClose); + return block; +} + +NonnullOwnPtr Parser::parse_function_declaration() +{ + consume(TokenType::Function); + auto name = consume(TokenType::Identifier).value(); + consume(TokenType::ParenOpen); + while (match(TokenType::Identifier)) { + // FIXME: actually add parameters to function + consume(TokenType::Identifier); + if (match(TokenType::ParenClose)) { + break; + } + consume(TokenType::Comma); + } + consume(TokenType::ParenClose); + auto body = parse_block_statement(); + return make(name, move(body)); +} + +NonnullOwnPtr Parser::parse_variable_declaration() +{ + consume(TokenType::Var); + auto name = consume(TokenType::Identifier).value(); + OwnPtr initializer; + if (match(TokenType::Equals)) { + consume(); + initializer = parse_expression(); + } + return make(make(name), move(initializer), DeclarationType::Var); +} + +bool Parser::match(TokenType type) const +{ + return m_current_token.type() == type; +} + +bool Parser::match_expression() const +{ + auto type = m_current_token.type(); + return type == TokenType::BoolLiteral + || type == TokenType::NumericLiteral + || type == TokenType::StringLiteral + || type == TokenType::NullLiteral + || type == TokenType::Identifier + || type == TokenType::New + || type == TokenType::CurlyOpen + || type == TokenType::BracketOpen + || type == TokenType::ParenOpen; +} + +bool Parser::match_secondary_expression() const +{ + auto type = m_current_token.type(); + return type == TokenType::Plus + || type == TokenType::Minus + || type == TokenType::Asterisk + || type == TokenType::Slash + || type == TokenType::Equals + || type == TokenType::ParenOpen; +} + +bool Parser::match_statement() const +{ + auto type = m_current_token.type(); + return match_expression() + || type == TokenType::Function + || type == TokenType::Return + || type == TokenType::Let + || type == TokenType::Catch + || type == TokenType::Class + || type == TokenType::Delete + || type == TokenType::Do + || type == TokenType::If + || type == TokenType::Try + || type == TokenType::While + || type == TokenType::Const + || type == TokenType::CurlyOpen + || type == TokenType::Var; +} + +bool Parser::done() const +{ + return match(TokenType::Eof); +} + +Token Parser::consume() +{ + auto oldToken = m_current_token; + m_current_token = m_lexer.next(); + return oldToken; +} + +Token Parser::consume(TokenType type) +{ + if (m_current_token.type() != type) { + m_has_errors = true; + fprintf(stderr, "Error: Unexpected token %s. Expected %s\n", m_current_token.name(), Token::name(type)); + } + return consume(); +} + +void Parser::expected(const char* what) +{ + m_has_errors = true; + fprintf(stderr, "Error: Unexpected token %s. Expected %s\n", m_current_token.name(), what); +} + +} diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h new file mode 100644 index 0000000000..1800004c16 --- /dev/null +++ b/Libraries/LibJS/Parser.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "AST.h" +#include "Lexer.h" +#include + +namespace JS { +class Parser { +public: + explicit Parser(Lexer lexer); + + NonnullOwnPtr parse_program(); + + NonnullOwnPtr parse_statement(); + NonnullOwnPtr parse_block_statement(); + NonnullOwnPtr parse_return_statement(); + NonnullOwnPtr parse_function_declaration(); + NonnullOwnPtr parse_variable_declaration(); + + NonnullOwnPtr parse_expression(); + NonnullOwnPtr parse_primary_expression(); + NonnullOwnPtr parse_object_expression(); + NonnullOwnPtr parse_secondary_expression(NonnullOwnPtr); + NonnullOwnPtr parse_call_expression(NonnullOwnPtr); + + bool has_errors() const { return m_has_errors; } + +private: + bool match_expression() const; + bool match_secondary_expression() const; + bool match_statement() const; + bool match(TokenType type) const; + bool done() const; + void expected(const char* what); + Token consume(); + Token consume(TokenType type); + + Lexer m_lexer; + Token m_current_token; + bool m_has_errors = false; +}; +} diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp new file mode 100644 index 0000000000..3bf3d3e7bd --- /dev/null +++ b/Libraries/LibJS/Token.cpp @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Token.h" + +namespace JS { + +const char* Token::name(TokenType type) +{ + switch (type) { + case TokenType::Ampersand: + return "Ampersand"; + case TokenType::AmpersandEquals: + return "AmpersandEquals"; + case TokenType::Asterisk: + return "Asterisk"; + case TokenType::AsteriskEquals: + return "AsteriskEquals"; + case TokenType::BoolLiteral: + return "BoolLiteral"; + case TokenType::BracketOpen: + return "BracketOpen"; + case TokenType::BracketClose: + return "BracketClose"; + case TokenType::Catch: + return "Catch"; + case TokenType::Class: + return "Class"; + case TokenType::Comma: + return "Comma"; + case TokenType::Const: + return "Const"; + case TokenType::CurlyClose: + return "CurlyClose"; + case TokenType::CurlyOpen: + return "CurlyOpen"; + case TokenType::Delete: + return "Delete"; + case TokenType::Do: + return "Do"; + case TokenType::DoubleAmpersand: + return "DoubleAmpersand"; + case TokenType::DoublePipe: + return "DoublePipe"; + case TokenType::Else: + return "Else"; + case TokenType::Eof: + return "Eof"; + case TokenType::Equals: + return "Equals"; + case TokenType::EqualsEquals: + return "EqualsEquals"; + case TokenType::ExclamationMark: + return "ExclamationMark"; + case TokenType::ExclamationMarkEquals: + return "ExclamationMarkEquals"; + case TokenType::Finally: + return "Finally"; + case TokenType::Function: + return "Function"; + case TokenType::GreaterThan: + return "GreaterThan"; + case TokenType::Identifier: + return "Identifier"; + case TokenType::If: + return "If"; + case TokenType::Interface: + return "Interface"; + case TokenType::Invalid: + return "Invalid"; + case TokenType::LessThan: + return "LessThan"; + case TokenType::Let: + return "Let"; + case TokenType::Minus: + return "Minus"; + case TokenType::MinusEquals: + return "MinusEquals"; + case TokenType::MinusMinus: + return "MinusMinus"; + case TokenType::New: + return "New"; + case TokenType::NullLiteral: + return "NullLiteral"; + case TokenType::NumericLiteral: + return "NumericLiteral"; + case TokenType::ParenClose: + return "ParenClose"; + case TokenType::ParenOpen: + return "ParenOpen"; + case TokenType::Percent: + return "Percent"; + case TokenType::PercentEquals: + return "PercentEquals"; + case TokenType::Period: + return "Period"; + case TokenType::Pipe: + return "Pipe"; + case TokenType::PipeEquals: + return "PipeEquals"; + case TokenType::Plus: + return "Plus"; + case TokenType::PlusEquals: + return "PlusEquals"; + case TokenType::PlusPlus: + return "PlusPlus"; + case TokenType::QuestionMark: + return "QuestionMark"; + case TokenType::RegexLiteral: + return "RegexLiteral"; + case TokenType::Return: + return "Return"; + case TokenType::Semicolon: + return "Semicolon"; + case TokenType::ShiftLeft: + return "ShiftLeft"; + case TokenType::ShiftRight: + return "ShiftRight"; + case TokenType::Slash: + return "Slash"; + case TokenType::SlashEquals: + return "SlashEquals"; + case TokenType::StringLiteral: + return "StringLiteral"; + case TokenType::Try: + return "Try"; + case TokenType::Var: + return "Var"; + case TokenType::While: + return "While"; + default: + return ""; + } +} + +const char* Token::name() const +{ + return name(m_type); +} + +double Token::double_value() const +{ + // FIXME: need to parse double instead of int + bool ok; + return m_value.to_int(ok); +} + +String Token::string_value() const +{ + // FIXME: unescape the string and remove quotes + return m_value; +} + +bool Token::bool_value() const +{ + return m_value == "true"; +} + +} diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h new file mode 100644 index 0000000000..e7f6282304 --- /dev/null +++ b/Libraries/LibJS/Token.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include + +namespace JS { + +enum class TokenType { + Ampersand, + AmpersandEquals, + Asterisk, + AsteriskEquals, + BoolLiteral, + BracketClose, + BracketOpen, + Catch, + Class, + Comma, + Const, + CurlyClose, + CurlyOpen, + Delete, + Do, + DoubleAmpersand, + DoublePipe, + Else, + Eof, + Equals, + EqualsEquals, + ExclamationMark, + ExclamationMarkEquals, + Finally, + Function, + GreaterThan, + Identifier, + If, + Interface, + Invalid, + LessThan, + Let, + Minus, + MinusEquals, + MinusMinus, + New, + NullLiteral, + NumericLiteral, + ParenClose, + ParenOpen, + Percent, + PercentEquals, + Period, + Pipe, + PipeEquals, + Plus, + PlusEquals, + PlusPlus, + QuestionMark, + RegexLiteral, + Return, + Semicolon, + ShiftLeft, + ShiftRight, + Slash, + SlashEquals, + StringLiteral, + Try, + Var, + While +}; + +class Token { +public: + Token(TokenType type, StringView trivia, StringView value) + : m_type(type) + , m_trivia(trivia) + , m_value(value) + { + } + + TokenType type() const { return m_type; } + const char* name() const; + static const char* name(TokenType); + + const StringView& trivia() const { return m_trivia; } + const StringView& value() const { return m_value; } + double double_value() const; + String string_value() const; + bool bool_value() const; + +private: + TokenType m_type; + StringView m_trivia; + StringView m_value; +}; + +} diff --git a/Userland/js.cpp b/Userland/js.cpp index cc6e09a2ad..6c919fdb6b 100644 --- a/Userland/js.cpp +++ b/Userland/js.cpp @@ -28,20 +28,20 @@ #include #include #include +#include #include #include #include -#define PROGRAM 4 +#define PROGRAM 6 -static void build_program(JS::Program&, JS::Heap&); +static NonnullOwnPtr build_program(JS::Heap&); int main() { JS::Interpreter interpreter; - auto program = make(); - build_program(*program, interpreter.heap()); + auto program = build_program(interpreter.heap()); program->dump(0); auto result = interpreter.run(*program); @@ -55,7 +55,7 @@ int main() } #if PROGRAM == 1 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr build_program(JS::Heap&) { // function foo() { return (1 + 2) + 3; } // foo(); @@ -70,11 +70,13 @@ void build_program(JS::Program& program, JS::Heap&) make(JS::Value(2))), make(JS::Value(3)))); - program.append("foo", move(block)); - program.append("foo"); + auto program = make(); + program->append("foo", move(block)); + program->append(make("foo")); + return program; } #elif PROGRAM == 2 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr build_program(JS::Heap&) { // c = 1; // function foo() { @@ -84,10 +86,11 @@ void build_program(JS::Program& program, JS::Heap&) // } // foo(); - program.append( + auto program = make(); + program->append(make( JS::AssignmentOp::Assign, make("c"), - make(JS::Value(1))); + make(JS::Value(1)))); auto block = make(); block->append( @@ -107,11 +110,14 @@ void build_program(JS::Program& program, JS::Heap&) make("a"), make("b")), make("c"))); - program.append("foo", move(block)); - program.append("foo"); + + program->append("foo", move(block)); + program->append(make("foo")); + + return program; } #elif PROGRAM == 3 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr build_program(JS::Heap&) { // function foo() { // var x = {}; @@ -124,13 +130,15 @@ void build_program(JS::Program& program, JS::Heap&) make("x"), make(), JS::DeclarationType::Var); - block->append("$gc"); + block->append(make("$gc")); - program.append("foo", move(block)); - program.append("foo"); + auto program = make(); + program->append("foo", move(block)); + program->append(make("foo")); + return program; } #elif PROGRAM == 4 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr build_program(JS::Heap&) { // function foo() { // function bar() { @@ -147,19 +155,38 @@ void build_program(JS::Program& program, JS::Heap&) auto block_foo = make(); block_foo->append("bar", move(block_bar)); - block_foo->append("bar"); + block_foo->append(make("bar")); block_foo->append(make("y")); - program.append("foo", move(block_foo)); - program.append("foo"); + auto program = make(); + program->append("foo", move(block_foo)); + program->append(make("foo")); + return program; } #elif PROGRAM == 5 -void build_program(JS::Program& program, JS::Heap& heap) +NonnullOwnPtr build_program(JS::Heap& heap) { // "hello friends".length - program.append( + auto program = make(); + program->append(make( make(JS::Value(js_string(heap, "hello friends"))), - make("length")); + make("length"))); + + return program; +} +#elif PROGRAM == 6 +NonnullOwnPtr build_program(JS::Heap&) +{ + const char* source = "var foo = 1;\n" + "function bar() {\n" + " return 38;\n" + "}\n" + "foo = {};\n" + "foo = bar() + 4;\n" + "foo;\n"; + + auto parser = JS::Parser(JS::Lexer(source)); + return parser.parse_program(); } #endif