1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 06:17:35 +00:00

LibJS: Allow Unicode escape sequences in identifiers

For example, "property.br\u{64}wn" should resolve to "property.brown".

To support this behavior, this commit changes the Token class to hold
both the evaluated identifier name and a view into the original source
for the unevaluated name. There are some contexts in which identifiers
are not allowed to contain Unicode escape sequences; for example, export
statements of the form "export {} from foo.js" forbid escapes in the
identifier "from".

The test file is added to .prettierignore because prettier will replace
all escaped Unicode sequences with their unescaped value.
This commit is contained in:
Timothy Flynn 2021-08-18 16:34:25 -04:00 committed by Andreas Kling
parent c5b5c779ff
commit 1259dc3623
7 changed files with 163 additions and 54 deletions

View file

@ -41,8 +41,9 @@ private:
bool is_eof() const;
bool is_line_terminator() const;
bool is_whitespace() const;
bool is_identifier_start() const;
bool is_identifier_middle() const;
Optional<u32> is_unicode_escape(size_t& identifier_length) const;
Optional<u32> is_identifier_start(size_t& identifier_length) const;
Optional<u32> is_identifier_middle(size_t& identifier_length) const;
bool is_line_comment_start(bool line_has_token_yet) const;
bool is_block_comment_start() const;
bool is_block_comment_end() const;
@ -80,6 +81,10 @@ private:
static HashMap<String, TokenType> s_three_char_tokens;
static HashMap<String, TokenType> s_two_char_tokens;
static HashMap<char, TokenType> s_single_char_tokens;
// Resolved identifiers must be kept alive for the duration of the parsing stage, otherwise
// the only references to these strings are deleted by the Token destructor.
Vector<FlyString> m_parsed_identifiers;
};
}