mirror of
https://github.com/RGBCube/serenity
synced 2025-05-20 13:55:08 +00:00

For example, "property.br\u{64}wn" should resolve to "property.brown". To support this behavior, this commit changes the Token class to hold both the evaluated identifier name and a view into the original source for the unevaluated name. There are some contexts in which identifiers are not allowed to contain Unicode escape sequences; for example, export statements of the form "export {} from foo.js" forbid escapes in the identifier "from". The test file is added to .prettierignore because prettier will replace all escaped Unicode sequences with their unescaped value.
252 lines
8.4 KiB
C++
252 lines
8.4 KiB
C++
/*
|
|
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
|
* Copyright (c) 2020-2021, Linus Groh <linusg@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include "Token.h"
|
|
#include <AK/Assertions.h>
|
|
#include <AK/CharacterTypes.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/StringBuilder.h>
|
|
|
|
namespace JS {
|
|
|
|
const char* Token::name(TokenType type)
|
|
{
|
|
switch (type) {
|
|
#define __ENUMERATE_JS_TOKEN(type, category) \
|
|
case TokenType::type: \
|
|
return #type;
|
|
ENUMERATE_JS_TOKENS
|
|
#undef __ENUMERATE_JS_TOKEN
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
return "<Unknown>";
|
|
}
|
|
}
|
|
|
|
const char* Token::name() const
|
|
{
|
|
return name(m_type);
|
|
}
|
|
|
|
TokenCategory Token::category(TokenType type)
|
|
{
|
|
switch (type) {
|
|
#define __ENUMERATE_JS_TOKEN(type, category) \
|
|
case TokenType::type: \
|
|
return TokenCategory::category;
|
|
ENUMERATE_JS_TOKENS
|
|
#undef __ENUMERATE_JS_TOKEN
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
TokenCategory Token::category() const
|
|
{
|
|
return category(m_type);
|
|
}
|
|
|
|
double Token::double_value() const
|
|
{
|
|
VERIFY(type() == TokenType::NumericLiteral);
|
|
|
|
StringBuilder builder;
|
|
|
|
for (auto ch : value()) {
|
|
if (ch == '_')
|
|
continue;
|
|
builder.append(ch);
|
|
}
|
|
|
|
String value_string = builder.to_string();
|
|
if (value_string[0] == '0' && value_string.length() >= 2) {
|
|
if (value_string[1] == 'x' || value_string[1] == 'X') {
|
|
// hexadecimal
|
|
return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 16));
|
|
} else if (value_string[1] == 'o' || value_string[1] == 'O') {
|
|
// octal
|
|
return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 8));
|
|
} else if (value_string[1] == 'b' || value_string[1] == 'B') {
|
|
// binary
|
|
return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 2));
|
|
} else if (is_ascii_digit(value_string[1])) {
|
|
// also octal, but syntax error in strict mode
|
|
if (!value().contains('8') && !value().contains('9'))
|
|
return static_cast<double>(strtoul(value_string.characters() + 1, nullptr, 8));
|
|
}
|
|
}
|
|
return strtod(value_string.characters(), nullptr);
|
|
}
|
|
|
|
static u32 hex2int(char x)
|
|
{
|
|
VERIFY(is_ascii_hex_digit(x));
|
|
if (x >= '0' && x <= '9')
|
|
return x - '0';
|
|
return 10u + (to_ascii_lowercase(x) - 'a');
|
|
}
|
|
|
|
String Token::string_value(StringValueStatus& status) const
|
|
{
|
|
VERIFY(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
|
|
|
|
auto is_template = type() == TokenType::TemplateLiteralString;
|
|
GenericLexer lexer(is_template ? value() : value().substring_view(1, value().length() - 2));
|
|
|
|
auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
|
|
status = parse_status;
|
|
return {};
|
|
};
|
|
|
|
StringBuilder builder;
|
|
while (!lexer.is_eof()) {
|
|
// No escape, consume one char and continue
|
|
if (!lexer.next_is('\\')) {
|
|
builder.append(lexer.consume());
|
|
continue;
|
|
}
|
|
|
|
// Unicode escape
|
|
if (lexer.next_is("\\u"sv)) {
|
|
auto code_point_or_error = lexer.consume_escaped_code_point();
|
|
|
|
if (code_point_or_error.is_error()) {
|
|
switch (code_point_or_error.error()) {
|
|
case GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape:
|
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
|
case GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow:
|
|
return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
|
|
}
|
|
}
|
|
|
|
builder.append_code_point(code_point_or_error.value());
|
|
continue;
|
|
}
|
|
|
|
lexer.ignore();
|
|
VERIFY(!lexer.is_eof());
|
|
|
|
// Line continuation
|
|
if (lexer.next_is('\n') || lexer.next_is('\r')) {
|
|
lexer.ignore();
|
|
continue;
|
|
}
|
|
// Line continuation
|
|
if (lexer.next_is(LINE_SEPARATOR_STRING) || lexer.next_is(PARAGRAPH_SEPARATOR_STRING)) {
|
|
lexer.ignore(3);
|
|
continue;
|
|
}
|
|
// Null-byte escape
|
|
if (lexer.next_is('0') && !is_ascii_digit(lexer.peek(1))) {
|
|
lexer.ignore();
|
|
builder.append('\0');
|
|
continue;
|
|
}
|
|
// Hex escape
|
|
if (lexer.next_is('x')) {
|
|
lexer.ignore();
|
|
if (!is_ascii_hex_digit(lexer.peek()) || !is_ascii_hex_digit(lexer.peek(1)))
|
|
return encoding_failure(StringValueStatus::MalformedHexEscape);
|
|
auto code_point = hex2int(lexer.consume()) * 16 + hex2int(lexer.consume());
|
|
VERIFY(code_point <= 255);
|
|
builder.append_code_point(code_point);
|
|
continue;
|
|
}
|
|
|
|
// In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
|
|
// https://tc39.es/ecma262/#sec-additional-syntax-string-literals
|
|
String octal_str;
|
|
|
|
auto is_octal_digit = [](char ch) { return ch >= '0' && ch <= '7'; };
|
|
auto is_zero_to_three = [](char ch) { return ch >= '0' && ch <= '3'; };
|
|
auto is_four_to_seven = [](char ch) { return ch >= '4' && ch <= '7'; };
|
|
|
|
// OctalDigit [lookahead ∉ OctalDigit]
|
|
if (is_octal_digit(lexer.peek()) && !is_octal_digit(lexer.peek(1)))
|
|
octal_str = lexer.consume(1);
|
|
// ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
|
|
else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && !is_octal_digit(lexer.peek(2)))
|
|
octal_str = lexer.consume(2);
|
|
// FourToSeven OctalDigit
|
|
else if (is_four_to_seven(lexer.peek()) && is_octal_digit(lexer.peek(1)))
|
|
octal_str = lexer.consume(2);
|
|
// ZeroToThree OctalDigit OctalDigit
|
|
else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && is_octal_digit(lexer.peek(2)))
|
|
octal_str = lexer.consume(3);
|
|
|
|
if (!octal_str.is_null()) {
|
|
status = StringValueStatus::LegacyOctalEscapeSequence;
|
|
auto code_point = strtoul(octal_str.characters(), nullptr, 8);
|
|
VERIFY(code_point <= 255);
|
|
builder.append_code_point(code_point);
|
|
continue;
|
|
}
|
|
|
|
lexer.retreat();
|
|
builder.append(lexer.consume_escaped_character('\\', "b\bf\fn\nr\rt\tv\v"));
|
|
}
|
|
return builder.to_string();
|
|
}
|
|
|
|
bool Token::bool_value() const
|
|
{
|
|
VERIFY(type() == TokenType::BoolLiteral);
|
|
return value() == "true";
|
|
}
|
|
|
|
bool Token::is_identifier_name() const
|
|
{
|
|
// IdentifierNames are Identifiers + ReservedWords
|
|
// The standard defines this reversed: Identifiers are IdentifierNames except reserved words
|
|
// https://tc39.es/ecma262/#prod-Identifier
|
|
return m_type == TokenType::Identifier
|
|
|| m_type == TokenType::Await
|
|
|| m_type == TokenType::BoolLiteral
|
|
|| m_type == TokenType::Break
|
|
|| m_type == TokenType::Case
|
|
|| m_type == TokenType::Catch
|
|
|| m_type == TokenType::Class
|
|
|| m_type == TokenType::Const
|
|
|| m_type == TokenType::Continue
|
|
|| m_type == TokenType::Debugger
|
|
|| m_type == TokenType::Default
|
|
|| m_type == TokenType::Delete
|
|
|| m_type == TokenType::Do
|
|
|| m_type == TokenType::Else
|
|
|| m_type == TokenType::Enum
|
|
|| m_type == TokenType::Export
|
|
|| m_type == TokenType::Extends
|
|
|| m_type == TokenType::Finally
|
|
|| m_type == TokenType::For
|
|
|| m_type == TokenType::Function
|
|
|| m_type == TokenType::If
|
|
|| m_type == TokenType::Import
|
|
|| m_type == TokenType::In
|
|
|| m_type == TokenType::Instanceof
|
|
|| m_type == TokenType::Let
|
|
|| m_type == TokenType::New
|
|
|| m_type == TokenType::NullLiteral
|
|
|| m_type == TokenType::Return
|
|
|| m_type == TokenType::Super
|
|
|| m_type == TokenType::Switch
|
|
|| m_type == TokenType::This
|
|
|| m_type == TokenType::Throw
|
|
|| m_type == TokenType::Try
|
|
|| m_type == TokenType::Typeof
|
|
|| m_type == TokenType::Var
|
|
|| m_type == TokenType::Void
|
|
|| m_type == TokenType::While
|
|
|| m_type == TokenType::With
|
|
|| m_type == TokenType::Yield;
|
|
}
|
|
|
|
bool Token::trivia_contains_line_terminator() const
|
|
{
|
|
return m_trivia.contains('\n') || m_trivia.contains('\r') || m_trivia.contains(LINE_SEPARATOR_STRING) || m_trivia.contains(PARAGRAPH_SEPARATOR_STRING);
|
|
}
|
|
|
|
}
|