diff --git a/Userland/Libraries/LibJS/Lexer.cpp b/Userland/Libraries/LibJS/Lexer.cpp index 7ac968e76b..9ae60a7e77 100644 --- a/Userland/Libraries/LibJS/Lexer.cpp +++ b/Userland/Libraries/LibJS/Lexer.cpp @@ -360,10 +360,15 @@ bool Lexer::slash_means_division() const return type == TokenType::BigIntLiteral || type == TokenType::BoolLiteral || type == TokenType::BracketClose + || type == TokenType::CurlyClose || type == TokenType::Identifier + || type == TokenType::In + || type == TokenType::Instanceof + || type == TokenType::MinusMinus || type == TokenType::NullLiteral || type == TokenType::NumericLiteral || type == TokenType::ParenClose + || type == TokenType::PlusPlus || type == TokenType::RegexLiteral || type == TokenType::StringLiteral || type == TokenType::TemplateLiteralEnd @@ -563,27 +568,7 @@ Token Lexer::next() } } else if (m_current_char == '/' && !slash_means_division()) { consume(); - token_type = TokenType::RegexLiteral; - - while (!is_eof()) { - if (m_current_char == '[') { - m_regex_is_in_character_class = true; - } else if (m_current_char == ']') { - m_regex_is_in_character_class = false; - } else if (!m_regex_is_in_character_class && m_current_char == '/') { - break; - } - - if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']'))) - consume(); - consume(); - } - - if (is_eof()) { - token_type = TokenType::UnterminatedRegexLiteral; - } else { - consume(); - } + token_type = consume_regex_literal(); } else if (m_eof) { if (unterminated_comment) { token_type = TokenType::Invalid; @@ -677,4 +662,73 @@ Token Lexer::next() return m_current_token; } +Token Lexer::force_slash_as_regex() +{ + VERIFY(m_current_token.type() == TokenType::Slash || m_current_token.type() == TokenType::SlashEquals); + + bool has_equals = m_current_token.type() == TokenType::SlashEquals; + + VERIFY(m_position > 0); + size_t value_start = m_position - 1; + + if (has_equals) { + VERIFY(m_source[value_start - 1] == '='); + --value_start; + --m_position; + m_current_char = '='; + } + + TokenType token_type = consume_regex_literal(); + + m_current_token = Token( + token_type, + "", + m_current_token.trivia(), + m_source.substring_view(value_start - 1, m_position - value_start), + m_filename, + m_current_token.line_number(), + m_current_token.line_column(), + m_position); + + if constexpr (LEXER_DEBUG) { + dbgln("------------------------------"); + dbgln("Token: {}", m_current_token.name()); + dbgln("Trivia: _{}_", m_current_token.trivia()); + dbgln("Value: _{}_", m_current_token.value()); + dbgln("Line: {}, Column: {}", m_current_token.line_number(), m_current_token.line_column()); + dbgln("------------------------------"); + } + + return m_current_token; +} + +TokenType Lexer::consume_regex_literal() +{ + TokenType token_type = TokenType::RegexLiteral; + + while (!is_eof()) { + if (is_line_terminator() || (!m_regex_is_in_character_class && m_current_char == '/')) { + break; + } else if (m_current_char == '[') { + m_regex_is_in_character_class = true; + } else if (m_current_char == ']') { + m_regex_is_in_character_class = false; + } else if (!m_regex_is_in_character_class && m_current_char == '/') { + break; + } + + if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']'))) + consume(); + consume(); + } + + if (m_current_char == '/') { + consume(); + return TokenType::RegexLiteral; + } else { + return TokenType::UnterminatedRegexLiteral; + } + return token_type; +} + } diff --git a/Userland/Libraries/LibJS/Lexer.h b/Userland/Libraries/LibJS/Lexer.h index b301e5aebd..3117ee15a5 100644 --- a/Userland/Libraries/LibJS/Lexer.h +++ b/Userland/Libraries/LibJS/Lexer.h @@ -25,6 +25,8 @@ public: void disallow_html_comments() { m_allow_html_comments = false; }; + Token force_slash_as_regex(); + private: void consume(); bool consume_exponent(); @@ -47,6 +49,8 @@ private: bool match_numeric_literal_separator_followed_by(Callback) const; bool slash_means_division() const; + TokenType consume_regex_literal(); + StringView m_source; size_t m_position { 0 }; Token m_current_token; diff --git a/Userland/Libraries/LibJS/Parser.cpp b/Userland/Libraries/LibJS/Parser.cpp index 2c99568c9e..cd9949d7dd 100644 --- a/Userland/Libraries/LibJS/Parser.cpp +++ b/Userland/Libraries/LibJS/Parser.cpp @@ -400,6 +400,10 @@ NonnullRefPtr Parser::parse_statement(AllowLabelledFunction allow_lab case TokenType::Semicolon: consume(); return create_ast_node({ m_state.current_token.filename(), rule_start.position(), position() }); + case TokenType::Slash: + case TokenType::SlashEquals: + m_state.current_token = m_state.lexer.force_slash_as_regex(); + [[fallthrough]]; default: if (match_identifier_name()) { auto result = try_parse_labelled_statement(allow_labelled_function); @@ -2556,6 +2560,8 @@ bool Parser::match_expression() const || type == TokenType::This || type == TokenType::Super || type == TokenType::RegexLiteral + || type == TokenType::Slash // Wrongly recognized regex by lexer + || type == TokenType::SlashEquals // Wrongly recognized regex by lexer (/=a/ is a valid regex) || type == TokenType::Yield || match_unary_prefixed_expression(); } diff --git a/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js b/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js index 67839147f9..c3c9771f1a 100644 --- a/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js +++ b/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js @@ -3,4 +3,22 @@ test("slash token resolution in lexer", () => { expect("``/foo/").not.toEval(); expect("1/foo/").not.toEval(); expect("1/foo").toEval(); + + expect("{} /foo/").toEval(); + expect("{} /=/").toEval(); + expect("{} /=a/").toEval(); + expect("{} /* */ /=a/").toEval(); + expect("{} /* /a/ */ /=a/").toEval(); + + expect("(function () {} / 1)").toEval(); + expect("(function () {} / 1)").toEval(); + + expect("+a++ / 1").toEval(); + expect("+a-- / 1").toEval(); + expect("a.in / b").toEval(); + expect("a.instanceof / b").toEval(); + + // FIXME: Even more 'reserved' words are valid however the cases below do still need to pass. + //expect("a.void / b").toEval(); + expect("void / b/").toEval(); });