mirror of
https://github.com/RGBCube/serenity
synced 2025-05-30 14:38:12 +00:00

For example, "property.br\u{64}wn" should resolve to "property.brown". To support this behavior, this commit changes the Token class to hold both the evaluated identifier name and a view into the original source for the unevaluated name. There are some contexts in which identifiers are not allowed to contain Unicode escape sequences; for example, export statements of the form "export {} from foo.js" forbid escapes in the identifier "from". The test file is added to .prettierignore because prettier will replace all escaped Unicode sequences with their unescaped value.
252 lines
12 KiB
C++
252 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/FlyString.h>
|
|
#include <AK/String.h>
|
|
#include <AK/StringView.h>
|
|
#include <AK/Variant.h>
|
|
|
|
namespace JS {
|
|
|
|
// U+2028 LINE SEPARATOR
|
|
constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
|
|
constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars };
|
|
constexpr const u32 LINE_SEPARATOR { 0x2028 };
|
|
|
|
// U+2029 PARAGRAPH SEPARATOR
|
|
constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
|
|
constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars };
|
|
constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 };
|
|
|
|
// U+00A0 NO BREAK SPACE
|
|
constexpr const u32 NO_BREAK_SPACE { 0x00A0 };
|
|
|
|
// U+200C ZERO WIDTH NON-JOINER
|
|
constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
|
|
|
|
// U+200D ZERO WIDTH JOINER
|
|
constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
|
|
|
|
#define ENUMERATE_JS_TOKENS \
|
|
__ENUMERATE_JS_TOKEN(Ampersand, Operator) \
|
|
__ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Arrow, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Asterisk, Operator) \
|
|
__ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Async, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Await, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
|
|
__ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(Break, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Caret, Operator) \
|
|
__ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Class, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Colon, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(Comma, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(Const, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(Debugger, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Delete, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
|
|
__ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Enum, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Eof, Invalid) \
|
|
__ENUMERATE_JS_TOKEN(Equals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Export, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Extends, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(For, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Function, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
|
|
__ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Identifier, Identifier) \
|
|
__ENUMERATE_JS_TOKEN(If, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Implements, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Import, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(In, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Interface, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Invalid, Invalid) \
|
|
__ENUMERATE_JS_TOKEN(LessThan, Operator) \
|
|
__ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Let, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Minus, Operator) \
|
|
__ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
|
|
__ENUMERATE_JS_TOKEN(New, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
|
|
__ENUMERATE_JS_TOKEN(Package, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(Percent, Operator) \
|
|
__ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Period, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Pipe, Operator) \
|
|
__ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Plus, Operator) \
|
|
__ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Private, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Protected, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Public, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
|
|
__ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
|
|
__ENUMERATE_JS_TOKEN(RegexFlags, String) \
|
|
__ENUMERATE_JS_TOKEN(RegexLiteral, String) \
|
|
__ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
|
|
__ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Slash, Operator) \
|
|
__ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Static, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(StringLiteral, String) \
|
|
__ENUMERATE_JS_TOKEN(Super, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
|
|
__ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
|
|
__ENUMERATE_JS_TOKEN(This, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Tilde, Operator) \
|
|
__ENUMERATE_JS_TOKEN(TripleDot, Operator) \
|
|
__ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Typeof, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
|
|
__ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
|
|
__ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
|
|
__ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
|
|
__ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
|
|
__ENUMERATE_JS_TOKEN(Var, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(Void, Keyword) \
|
|
__ENUMERATE_JS_TOKEN(While, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(With, ControlKeyword) \
|
|
__ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
|
|
|
|
enum class TokenType {
|
|
#define __ENUMERATE_JS_TOKEN(type, category) type,
|
|
ENUMERATE_JS_TOKENS
|
|
#undef __ENUMERATE_JS_TOKEN
|
|
_COUNT_OF_TOKENS
|
|
};
|
|
constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
|
|
|
|
enum class TokenCategory {
|
|
Invalid,
|
|
Number,
|
|
String,
|
|
Punctuation,
|
|
Operator,
|
|
Keyword,
|
|
ControlKeyword,
|
|
Identifier
|
|
};
|
|
|
|
class Token {
|
|
public:
|
|
Token() = default;
|
|
|
|
Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column, size_t offset)
|
|
: m_type(type)
|
|
, m_message(message)
|
|
, m_trivia(trivia)
|
|
, m_original_value(value)
|
|
, m_value(value)
|
|
, m_filename(filename)
|
|
, m_line_number(line_number)
|
|
, m_line_column(line_column)
|
|
, m_offset(offset)
|
|
{
|
|
}
|
|
|
|
Token(TokenType type, String message, StringView trivia, StringView original_value, FlyString value, StringView filename, size_t line_number, size_t line_column, size_t offset)
|
|
: m_type(type)
|
|
, m_message(message)
|
|
, m_trivia(trivia)
|
|
, m_original_value(original_value)
|
|
, m_value(move(value))
|
|
, m_filename(filename)
|
|
, m_line_number(line_number)
|
|
, m_line_column(line_column)
|
|
, m_offset(offset)
|
|
{
|
|
}
|
|
|
|
TokenType type() const { return m_type; }
|
|
TokenCategory category() const;
|
|
static TokenCategory category(TokenType);
|
|
const char* name() const;
|
|
static const char* name(TokenType);
|
|
|
|
const String& message() const { return m_message; }
|
|
const StringView& trivia() const { return m_trivia; }
|
|
const StringView& original_value() const { return m_original_value; }
|
|
StringView value() const
|
|
{
|
|
return m_value.visit(
|
|
[](StringView const& view) { return view; },
|
|
[](FlyString const& identifier) { return identifier.view(); },
|
|
[](Empty) -> StringView { VERIFY_NOT_REACHED(); });
|
|
}
|
|
const StringView& filename() const { return m_filename; }
|
|
size_t line_number() const { return m_line_number; }
|
|
size_t line_column() const { return m_line_column; }
|
|
size_t offset() const { return m_offset; }
|
|
double double_value() const;
|
|
bool bool_value() const;
|
|
|
|
enum class StringValueStatus {
|
|
Ok,
|
|
MalformedHexEscape,
|
|
MalformedUnicodeEscape,
|
|
UnicodeEscapeOverflow,
|
|
LegacyOctalEscapeSequence,
|
|
};
|
|
String string_value(StringValueStatus& status) const;
|
|
|
|
bool is_identifier_name() const;
|
|
bool trivia_contains_line_terminator() const;
|
|
|
|
private:
|
|
TokenType m_type { TokenType::Invalid };
|
|
String m_message;
|
|
StringView m_trivia;
|
|
StringView m_original_value;
|
|
Variant<Empty, StringView, FlyString> m_value { Empty {} };
|
|
StringView m_filename;
|
|
size_t m_line_number { 0 };
|
|
size_t m_line_column { 0 };
|
|
size_t m_offset { 0 };
|
|
};
|
|
|
|
}
|