1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-23 06:57:42 +00:00

LibJS: Parse slashes after reserved identifiers correctly

Previously we were unable to parse code like `yield/2` because `/2`
was parsed as a regex. At the same time `for (a in / b/)` was parsed
as a division.

This is solved by defaulting to division in the lexer, but calling
`force_slash_as_regex()` from the parser whenever an IdentifierName
is parsed as a ReservedWord.
This commit is contained in:
Simon Wanner 2023-05-28 00:08:52 +02:00 committed by Andreas Kling
parent b0bd1e5eb5
commit a2efecac03
6 changed files with 65 additions and 25 deletions

View file

@ -500,23 +500,18 @@ bool Lexer::is_numeric_literal_start() const
bool Lexer::slash_means_division() const
{
auto type = m_current_token.type();
return type == TokenType::BigIntLiteral
|| type == TokenType::BoolLiteral
return m_current_token.is_identifier_name()
|| type == TokenType::BigIntLiteral
|| type == TokenType::BracketClose
|| type == TokenType::CurlyClose
|| type == TokenType::Identifier
|| type == TokenType::In
|| type == TokenType::Instanceof
|| type == TokenType::MinusMinus
|| type == TokenType::NullLiteral
|| type == TokenType::NumericLiteral
|| type == TokenType::ParenClose
|| type == TokenType::PlusPlus
|| type == TokenType::PrivateIdentifier
|| type == TokenType::RegexLiteral
|| type == TokenType::StringLiteral
|| type == TokenType::TemplateLiteralEnd
|| type == TokenType::This;
|| type == TokenType::TemplateLiteralEnd;
}
Token Lexer::next()

View file

@ -1458,11 +1458,11 @@ Parser::PrimaryExpressionParseResult Parser::parse_primary_expression()
case TokenType::BigIntLiteral:
return { create_ast_node<BigIntLiteral>({ m_source_code, rule_start.position(), position() }, consume().value()) };
case TokenType::BoolLiteral:
return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume().bool_value()) };
return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().bool_value()) };
case TokenType::StringLiteral:
return { parse_string_literal(consume()) };
case TokenType::NullLiteral:
consume();
consume_and_allow_division();
return { create_ast_node<NullLiteral>({ m_source_code, rule_start.position(), position() }) };
case TokenType::CurlyOpen:
return { parse_object_expression() };
@ -2168,7 +2168,7 @@ Parser::ExpressionResult Parser::parse_secondary_expression(NonnullRefPtr<Expres
expected("IdentifierName");
}
return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume().DeprecatedFlyString_value()));
return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().DeprecatedFlyString_value()));
case TokenType::BracketOpen: {
consume(TokenType::BracketOpen);
auto expression = create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), parse_expression(0), true);
@ -4010,6 +4010,18 @@ bool Parser::done() const
}
Token Parser::consume()
{
auto old_token = m_state.current_token;
m_state.current_token = m_state.lexer.next();
// If an IdentifierName is not parsed as an Identifier a slash after it should not be a division
if (old_token.is_identifier_name() && (m_state.current_token.type() == TokenType::Slash || m_state.current_token.type() == TokenType::SlashEquals)) {
m_state.current_token = m_state.lexer.force_slash_as_regex();
}
return old_token;
}
Token Parser::consume_and_allow_division()
{
auto old_token = m_state.current_token;
m_state.current_token = m_state.lexer.next();
@ -4057,26 +4069,26 @@ Token Parser::consume_identifier()
if (match(TokenType::Let)) {
if (m_state.strict_mode)
syntax_error("'let' is not allowed as an identifier in strict mode");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Yield)) {
if (m_state.strict_mode || m_state.in_generator_function_context)
syntax_error("Identifier must not be a reserved word in strict mode ('yield')");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Await)) {
if (m_program_type == Program::Type::Module || m_state.await_expression_is_valid || m_state.in_class_static_init_block)
syntax_error("Identifier must not be a reserved word in modules ('await')");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Async))
return consume();
return consume_and_allow_division();
expected("Identifier");
return consume();
return consume_and_allow_division();
}
// https://tc39.es/ecma262/#prod-IdentifierReference
@ -4092,33 +4104,33 @@ Token Parser::consume_identifier_reference()
if (m_program_type == Program::Type::Module && name == "await"sv)
syntax_error("'await' is not allowed as an identifier in module");
return consume();
return consume_and_allow_division();
}
// See note in Parser::parse_identifier().
if (match(TokenType::Let)) {
if (m_state.strict_mode)
syntax_error("'let' is not allowed as an identifier in strict mode");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Yield)) {
if (m_state.strict_mode)
syntax_error("Identifier reference may not be 'yield' in strict mode");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Await)) {
if (m_program_type == Program::Type::Module)
syntax_error("'await' is not allowed as an identifier in module");
return consume();
return consume_and_allow_division();
}
if (match(TokenType::Async))
return consume();
return consume_and_allow_division();
expected(Token::name(TokenType::Identifier));
return consume();
return consume_and_allow_division();
}
Token Parser::consume(TokenType expected_type)
@ -4126,7 +4138,7 @@ Token Parser::consume(TokenType expected_type)
if (!match(expected_type)) {
expected(Token::name(expected_type));
}
auto token = consume();
auto token = expected_type == TokenType::Identifier ? consume_and_allow_division() : consume();
if (expected_type == TokenType::Identifier) {
if (m_state.strict_mode && is_strict_reserved_word(token.value()))
syntax_error(DeprecatedString::formatted("Identifier must not be a reserved word in strict mode ('{}')", token.value()));

View file

@ -244,6 +244,7 @@ private:
void expected(char const* what);
void syntax_error(DeprecatedString const& message, Optional<Position> = {});
Token consume();
Token consume_and_allow_division();
Token consume_identifier();
Token consume_identifier_reference();
Token consume(TokenType type);

View file

@ -210,6 +210,12 @@ describe("in- and exports", () => {
test("can have top level using declarations which trigger at the end of running a module", () => {
expectModulePassed("./top-level-dispose.mjs");
});
test("can export default a RegExp", () => {
const result = expectModulePassed("./default-regexp-export.mjs");
expect(result.default).toBeInstanceOf(RegExp);
expect(result.default.toString()).toBe(/foo/.toString());
});
});
describe("loops", () => {

View file

@ -0,0 +1,3 @@
export default /foo/;
export let passed = true;

View file

@ -19,7 +19,30 @@ test("slash token resolution in lexer", () => {
expect("a.instanceof / b").toEval();
expect("class A { #name; d = a.#name / b; }").toEval();
// FIXME: Even more 'reserved' words are valid however the cases below do still need to pass.
//expect("a.void / b").toEval();
expect("async / b").toEval();
expect("a.delete / b").toEval();
expect("delete / b/").toEval();
expect("a.in / b").toEval();
expect("for (a in / b/) {}").toEval();
expect("a.instanceof / b").toEval();
expect("a instanceof / b/").toEval();
expect("new / b/").toEval();
expect("null / b").toEval();
expect("for (a of / b/) {}").toEval();
expect("a.return / b").toEval();
expect("function foo() { return / b/ }").toEval();
expect("throw / b/").toEval();
expect("a.typeof / b").toEval();
expect("a.void / b").toEval();
expect("void / b/").toEval();
expect("await / b").toEval();
expect("await / b/").not.toEval();
expect("async function foo() { await / b }").not.toEval();
expect("async function foo() { await / b/ }").toEval();
expect("yield / b").toEval();
expect("yield / b/").not.toEval();
expect("function* foo() { yield / b }").not.toEval();
expect("function* foo() { yield / b/ }").toEval();
});