LibJS: Parse slashes after reserved identifiers correctly

Previously we were unable to parse code like `yield/2` because `/2` was parsed as a regex. At the same time `for (a in / b/)` was parsed as a division. This is solved by defaulting to division in the lexer, but calling `force_slash_as_regex()` from the parser whenever an IdentifierName is parsed as a ReservedWord.
2025-07-23 20:17:42 +00:00 · 2023-05-28 00:08:52 +02:00 · 2023-05-28 00:08:52 +02:00 · a2efecac03
commit a2efecac03
parent b0bd1e5eb5
6 changed files with 65 additions and 25 deletions
--- a/Userland/Libraries/LibJS/Lexer.cpp
+++ b/Userland/Libraries/LibJS/Lexer.cpp
@ -500,23 +500,18 @@ bool Lexer::is_numeric_literal_start() const
 bool Lexer::slash_means_division() const
 {
    auto type = m_current_token.type();
-    return type == TokenType::BigIntLiteral
+    return m_current_token.is_identifier_name()
-        || type == TokenType::BoolLiteral
+        || type == TokenType::BigIntLiteral
        || type == TokenType::BracketClose
        || type == TokenType::CurlyClose
        || type == TokenType::Identifier
        || type == TokenType::In
        || type == TokenType::Instanceof
        || type == TokenType::MinusMinus
        || type == TokenType::NullLiteral
        || type == TokenType::NumericLiteral
        || type == TokenType::ParenClose
        || type == TokenType::PlusPlus
        || type == TokenType::PrivateIdentifier
        || type == TokenType::RegexLiteral
        || type == TokenType::StringLiteral
-        || type == TokenType::TemplateLiteralEnd
+        || type == TokenType::TemplateLiteralEnd;
        || type == TokenType::This;
 }
 Token Lexer::next()
--- a/Userland/Libraries/LibJS/Parser.cpp
+++ b/Userland/Libraries/LibJS/Parser.cpp
@ -1458,11 +1458,11 @@ Parser::PrimaryExpressionParseResult Parser::parse_primary_expression()
    case TokenType::BigIntLiteral:
        return { create_ast_node<BigIntLiteral>({ m_source_code, rule_start.position(), position() }, consume().value()) };
    case TokenType::BoolLiteral:
-        return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume().bool_value()) };
+        return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().bool_value()) };
    case TokenType::StringLiteral:
        return { parse_string_literal(consume()) };
    case TokenType::NullLiteral:
-        consume();
+        consume_and_allow_division();
        return { create_ast_node<NullLiteral>({ m_source_code, rule_start.position(), position() }) };
    case TokenType::CurlyOpen:
        return { parse_object_expression() };
@ -2168,7 +2168,7 @@ Parser::ExpressionResult Parser::parse_secondary_expression(NonnullRefPtr<Expres
            expected("IdentifierName");
        }
-        return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume().DeprecatedFlyString_value()));
+        return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().DeprecatedFlyString_value()));
    case TokenType::BracketOpen: {
        consume(TokenType::BracketOpen);
        auto expression = create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), parse_expression(0), true);
@ -4010,6 +4010,18 @@ bool Parser::done() const
 }
 Token Parser::consume()
 {
    auto old_token = m_state.current_token;
    m_state.current_token = m_state.lexer.next();
    // If an IdentifierName is not parsed as an Identifier a slash after it should not be a division
    if (old_token.is_identifier_name() && (m_state.current_token.type() == TokenType::Slash || m_state.current_token.type() == TokenType::SlashEquals)) {
        m_state.current_token = m_state.lexer.force_slash_as_regex();
    }
    return old_token;
 }
 Token Parser::consume_and_allow_division()
 {
    auto old_token = m_state.current_token;
    m_state.current_token = m_state.lexer.next();
@ -4057,26 +4069,26 @@ Token Parser::consume_identifier()
    if (match(TokenType::Let)) {
        if (m_state.strict_mode)
            syntax_error("'let' is not allowed as an identifier in strict mode");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Yield)) {
        if (m_state.strict_mode || m_state.in_generator_function_context)
            syntax_error("Identifier must not be a reserved word in strict mode ('yield')");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Await)) {
        if (m_program_type == Program::Type::Module || m_state.await_expression_is_valid || m_state.in_class_static_init_block)
            syntax_error("Identifier must not be a reserved word in modules ('await')");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Async))
-        return consume();
+        return consume_and_allow_division();
    expected("Identifier");
-    return consume();
+    return consume_and_allow_division();
 }
 // https://tc39.es/ecma262/#prod-IdentifierReference
@ -4092,33 +4104,33 @@ Token Parser::consume_identifier_reference()
        if (m_program_type == Program::Type::Module && name == "await"sv)
            syntax_error("'await' is not allowed as an identifier in module");
-        return consume();
+        return consume_and_allow_division();
    }
    // See note in Parser::parse_identifier().
    if (match(TokenType::Let)) {
        if (m_state.strict_mode)
            syntax_error("'let' is not allowed as an identifier in strict mode");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Yield)) {
        if (m_state.strict_mode)
            syntax_error("Identifier reference may not be 'yield' in strict mode");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Await)) {
        if (m_program_type == Program::Type::Module)
            syntax_error("'await' is not allowed as an identifier in module");
-        return consume();
+        return consume_and_allow_division();
    }
    if (match(TokenType::Async))
-        return consume();
+        return consume_and_allow_division();
    expected(Token::name(TokenType::Identifier));
-    return consume();
+    return consume_and_allow_division();
 }
 Token Parser::consume(TokenType expected_type)
@ -4126,7 +4138,7 @@ Token Parser::consume(TokenType expected_type)
    if (!match(expected_type)) {
        expected(Token::name(expected_type));
    }
-    auto token = consume();
+    auto token = expected_type == TokenType::Identifier ? consume_and_allow_division() : consume();
    if (expected_type == TokenType::Identifier) {
        if (m_state.strict_mode && is_strict_reserved_word(token.value()))
            syntax_error(DeprecatedString::formatted("Identifier must not be a reserved word in strict mode ('{}')", token.value()));
--- a/Userland/Libraries/LibJS/Parser.h
+++ b/Userland/Libraries/LibJS/Parser.h
@ -244,6 +244,7 @@ private:
    void expected(char const* what);
    void syntax_error(DeprecatedString const& message, Optional<Position> = {});
    Token consume();
    Token consume_and_allow_division();
    Token consume_identifier();
    Token consume_identifier_reference();
    Token consume(TokenType type);
--- a/Userland/Libraries/LibJS/Tests/modules/basic-modules.js
+++ b/Userland/Libraries/LibJS/Tests/modules/basic-modules.js
@ -210,6 +210,12 @@ describe("in- and exports", () => {
    test("can have top level using declarations which trigger at the end of running a module", () => {
        expectModulePassed("./top-level-dispose.mjs");
    });
    test("can export default a RegExp", () => {
        const result = expectModulePassed("./default-regexp-export.mjs");
        expect(result.default).toBeInstanceOf(RegExp);
        expect(result.default.toString()).toBe(/foo/.toString());
    });
 });
 describe("loops", () => {
--- a/Userland/Libraries/LibJS/Tests/modules/default-regexp-export.mjs
+++ b/Userland/Libraries/LibJS/Tests/modules/default-regexp-export.mjs
@ -0,0 +1,3 @@
 export default /foo/;
 export let passed = true;
--- a/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js
+++ b/Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js
@ -19,7 +19,30 @@ test("slash token resolution in lexer", () => {
    expect("a.instanceof / b").toEval();
    expect("class A { #name; d = a.#name / b; }").toEval();
-    // FIXME: Even more 'reserved' words are valid however the cases below do still need to pass.
+    expect("async / b").toEval();
-    //expect("a.void / b").toEval();
+    expect("a.delete / b").toEval();
    expect("delete / b/").toEval();
    expect("a.in / b").toEval();
    expect("for (a in / b/) {}").toEval();
    expect("a.instanceof / b").toEval();
    expect("a instanceof / b/").toEval();
    expect("new / b/").toEval();
    expect("null / b").toEval();
    expect("for (a of / b/) {}").toEval();
    expect("a.return / b").toEval();
    expect("function foo() { return / b/ }").toEval();
    expect("throw / b/").toEval();
    expect("a.typeof / b").toEval();
    expect("a.void / b").toEval();
    expect("void / b/").toEval();
    expect("await / b").toEval();
    expect("await / b/").not.toEval();
    expect("async function foo() { await / b }").not.toEval();
    expect("async function foo() { await / b/ }").toEval();
    expect("yield / b").toEval();
    expect("yield / b/").not.toEval();
    expect("function* foo() { yield / b }").not.toEval();
    expect("function* foo() { yield / b/ }").toEval();
 });
		`@ -0,0 +1,3 @@`
							`export default /foo/;`

							`export let passed = true;`