diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp index 806d4aa73b..d8ab3f7a40 100644 --- a/Libraries/LibJS/Parser.cpp +++ b/Libraries/LibJS/Parser.cpp @@ -836,23 +836,41 @@ NonnullRefPtr Parser::parse_array_expression() return create_ast_node(move(elements)); } -NonnullRefPtr Parser::parse_string_literal(Token token) +NonnullRefPtr Parser::parse_string_literal(Token token, bool in_template_literal) { auto status = Token::StringValueStatus::Ok; auto string = token.string_value(status); if (status != Token::StringValueStatus::Ok) { String message; - if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) { + if (status == Token::StringValueStatus::LegacyOctalEscapeSequence) { + m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = true; + if (in_template_literal) + message = "Octal escape sequence not allowed in template literal"; + else if (m_parser_state.m_strict_mode) + message = "Octal escape sequence in string literal not allowed in strict mode"; + } else if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) { auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal"; message = String::formatted("Malformed {} escape sequence", type); } else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) { message = "Unicode code_point must not be greater than 0x10ffff in escape sequence"; + } else { + ASSERT_NOT_REACHED(); } if (!message.is_empty()) syntax_error(message, token.line_number(), token.line_column()); } + // It is possible for string literals to precede a Use Strict Directive that places the + // enclosing code in strict mode, and implementations must take care to not use this + // extended definition of EscapeSequence with such literals. For example, attempting to + // parse the following source text must fail: + // + // function invalid() { "\7"; "use strict"; } + + if (m_parser_state.m_string_legacy_octal_escape_sequence_in_scope && string == "use strict") + syntax_error("Octal escape sequence in string literal not allowed in strict mode"); + if (m_parser_state.m_use_strict_directive == UseStrictDirectiveState::Looking) { if (string == "use strict" && token.type() != TokenType::TemplateLiteralString) { m_parser_state.m_use_strict_directive = UseStrictDirectiveState::Found; @@ -884,7 +902,7 @@ NonnullRefPtr Parser::parse_template_literal(bool is_tagged) while (!done() && !match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) { if (match(TokenType::TemplateLiteralString)) { auto token = consume(); - expressions.append(parse_string_literal(token)); + expressions.append(parse_string_literal(token, true)); if (is_tagged) raw_strings.append(create_ast_node(token.value())); } else if (match(TokenType::TemplateLiteralExprStart)) { @@ -1249,6 +1267,7 @@ NonnullRefPtr Parser::parse_block_statement(bool& is_strict) first = false; } m_parser_state.m_strict_mode = initial_strict_mode_state; + m_parser_state.m_string_legacy_octal_escape_sequence_in_scope = false; consume(TokenType::CurlyClose); block->add_variables(m_parser_state.m_let_scopes.last()); block->add_functions(m_parser_state.m_function_scopes.last()); diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h index 007e8b6879..565f3ff704 100644 --- a/Libraries/LibJS/Parser.h +++ b/Libraries/LibJS/Parser.h @@ -87,7 +87,7 @@ public: NonnullRefPtr parse_regexp_literal(); NonnullRefPtr parse_object_expression(); NonnullRefPtr parse_array_expression(); - NonnullRefPtr parse_string_literal(Token token); + NonnullRefPtr parse_string_literal(Token token, bool in_template_literal = false); NonnullRefPtr parse_template_literal(bool is_tagged); NonnullRefPtr parse_secondary_expression(NonnullRefPtr, int min_precedence, Associativity associate = Associativity::Right); NonnullRefPtr parse_call_expression(NonnullRefPtr); @@ -184,6 +184,7 @@ private: bool m_in_function_context { false }; bool m_in_break_context { false }; bool m_in_continue_context { false }; + bool m_string_legacy_octal_escape_sequence_in_scope { false }; explicit ParserState(Lexer); }; diff --git a/Libraries/LibJS/Tests/string-escapes.js b/Libraries/LibJS/Tests/string-escapes.js index e4abf9ec07..d8c75e9055 100644 --- a/Libraries/LibJS/Tests/string-escapes.js +++ b/Libraries/LibJS/Tests/string-escapes.js @@ -13,3 +13,32 @@ test("unicode escapes", () => { expect(`\u{1f41e}`).toBe("🐞"); expect("\u00ff").toBe(String.fromCharCode(0xff)); }); + +describe("octal escapes", () => { + test("basic functionality", () => { + expect("\1").toBe("\u0001"); + expect("\2").toBe("\u0002"); + expect("\3").toBe("\u0003"); + expect("\4").toBe("\u0004"); + expect("\5").toBe("\u0005"); + expect("\6").toBe("\u0006"); + expect("\7").toBe("\u0007"); + expect("\8").toBe("8"); + expect("\9").toBe("9"); + expect("\128").toBe("\n8"); + expect("\141bc").toBe("abc"); + expect("f\157o\142a\162").toBe("foobar"); + expect("\123\145\162\145\156\151\164\171\117\123").toBe("SerenityOS"); + }); + + test("syntax error in template literal", () => { + expect("`\\123`").not.toEval(); + }); + + test("syntax error in strict mode", () => { + expect("'use strict'; '\\123'").not.toEval(); + expect('"use strict"; "\\123"').not.toEval(); + // Special case, string literal precedes use strict directive + expect("'\\123'; somethingElse; 'use strict'").not.toEval(); + }); +}); diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp index 0921ee9e14..57190ef487 100644 --- a/Libraries/LibJS/Token.cpp +++ b/Libraries/LibJS/Token.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, Stephan Unverwerth + * Copyright (c) 2020, Linus Groh * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,8 +104,19 @@ String Token::string_value(StringValueStatus& status) const { ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString); auto is_template = type() == TokenType::TemplateLiteralString; + auto offset = is_template ? 0 : 1; - auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1; + size_t i; + + auto lookahead = [&](T fn, size_t distance = 1) -> bool { + if (i + distance >= m_value.length() - offset) + return false; + return fn(m_value[i + distance]); + }; + + auto is_octal_digit = [](char c) { + return c >= '0' && c <= '7'; + }; auto encoding_failure = [&status](StringValueStatus parse_status) -> String { status = parse_status; @@ -112,7 +124,7 @@ String Token::string_value(StringValueStatus& status) const }; StringBuilder builder; - for (size_t i = offset; i < m_value.length() - offset; ++i) { + for (i = offset; i < m_value.length() - offset; ++i) { if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) { i++; switch (m_value[i]) { @@ -134,9 +146,6 @@ String Token::string_value(StringValueStatus& status) const case 'v': builder.append('\v'); break; - case '0': - builder.append((char)0); - break; case '\'': builder.append('\''); break; @@ -200,9 +209,43 @@ String Token::string_value(StringValueStatus& status) const builder.append(m_value[i]); break; } + if (m_value[i] == '0' && !lookahead(isdigit)) { + builder.append((char)0); + break; + } - // FIXME: Also parse octal. Should anything else generate a syntax error? - builder.append(m_value[i]); + // In non-strict mode LegacyOctalEscapeSequence is allowed in strings: + // https://tc39.es/ecma262/#sec-additional-syntax-string-literals + String octal_str; + + // OctalDigit [lookahead ∉ OctalDigit] + if (is_octal_digit(m_value[i]) && !lookahead(is_octal_digit)) { + status = StringValueStatus::LegacyOctalEscapeSequence; + octal_str = String(&m_value[i], 1); + } + // ZeroToThree OctalDigit [lookahead ∉ OctalDigit] + else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && !lookahead(is_octal_digit, 2)) { + status = StringValueStatus::LegacyOctalEscapeSequence; + octal_str = String(m_value.substring_view(i, 2)); + i++; + } + // FourToSeven OctalDigit + else if (m_value[i] >= '4' && m_value[i] <= '7' && lookahead(is_octal_digit)) { + status = StringValueStatus::LegacyOctalEscapeSequence; + octal_str = String(m_value.substring_view(i, 2)); + i++; + } + // ZeroToThree OctalDigit OctalDigit + else if (m_value[i] >= '0' && m_value[i] <= '3' && lookahead(is_octal_digit) && lookahead(is_octal_digit, 2)) { + status = StringValueStatus::LegacyOctalEscapeSequence; + octal_str = String(m_value.substring_view(i, 3)); + i += 2; + } + + if (status == StringValueStatus::LegacyOctalEscapeSequence) + builder.append_code_point(strtoul(octal_str.characters(), nullptr, 8)); + else + builder.append(m_value[i]); } } else { builder.append(m_value[i]); diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h index 3b967f9775..b9d4bb0d60 100644 --- a/Libraries/LibJS/Token.h +++ b/Libraries/LibJS/Token.h @@ -208,6 +208,7 @@ public: MalformedHexEscape, MalformedUnicodeEscape, UnicodeEscapeOverflow, + LegacyOctalEscapeSequence, }; String string_value(StringValueStatus& status) const;