1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 13:37:44 +00:00

LibJS: Correctly handle Unicode characters in JS source text

Also recognize additional white space characters.
This commit is contained in:
davidot 2021-08-14 17:07:47 +02:00 committed by Linus Groh
parent 4d6502de42
commit 47bc72bcf6
6 changed files with 100 additions and 16 deletions

View file

@ -13,11 +13,22 @@ namespace JS {
// U+2028 LINE SEPARATOR
constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
constexpr const StringView LINE_SEPARATOR { line_separator_chars };
constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars };
constexpr const u32 LINE_SEPARATOR { 0x2028 };
// U+2029 PARAGRAPH SEPARATOR
constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
constexpr const StringView PARAGRAPH_SEPARATOR { paragraph_separator_chars };
constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars };
constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 };
// U+00A0 NO BREAK SPACE
constexpr const u32 NO_BREAK_SPACE { 0x00A0 };
// U+200C ZERO WIDTH NON-JOINER
constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
// U+200D ZERO WIDTH JOINER
constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
#define ENUMERATE_JS_TOKENS \
__ENUMERATE_JS_TOKEN(Ampersand, Operator) \