From 49018553d332280bc1f15ee102ad8235d66ce76d Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 26 Jun 2021 16:30:05 +0200 Subject: [PATCH] LibJS+LibCrypto: Allow '_' as a numeric literal separator :^) This patch adds support for the NumericLiteralSeparator concept from the ECMAScript grammar. --- .../LibCrypto/BigInt/UnsignedBigInteger.cpp | 8 +++ Userland/Libraries/LibJS/Lexer.cpp | 54 ++++++++++++++----- Userland/Libraries/LibJS/Lexer.h | 3 ++ Userland/Libraries/LibJS/Token.cpp | 11 +++- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp index 47d19333ff..61dd2f0321 100644 --- a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp +++ b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp @@ -72,6 +72,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base10(const String& str) UnsignedBigInteger ten { 10 }; for (auto& c : str) { + if (c == '_') + continue; result = result.multiplied_by(ten).plus(parse_ascii_digit(c)); } return result; @@ -83,6 +85,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base2(const String& str) UnsignedBigInteger two { 2 }; for (auto& c : str) { + if (c == '_') + continue; result = result.multiplied_by(two).plus(parse_ascii_digit(c)); } return result; @@ -94,6 +98,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base8(const String& str) UnsignedBigInteger eight { 8 }; for (auto& c : str) { + if (c == '_') + continue; result = result.multiplied_by(eight).plus(parse_ascii_digit(c)); } return result; @@ -105,6 +111,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base16(const String& str) UnsignedBigInteger sixteen { 16 }; for (auto& c : str) { + if (c == '_') + continue; result = result.multiplied_by(sixteen).plus(parse_ascii_hex_digit(c)); } return result; diff --git a/Userland/Libraries/LibJS/Lexer.cpp b/Userland/Libraries/LibJS/Lexer.cpp index 910d786483..5d39c5431e 100644 --- a/Userland/Libraries/LibJS/Lexer.cpp +++ b/Userland/Libraries/LibJS/Lexer.cpp @@ -193,6 +193,17 @@ void Lexer::consume() m_current_char = m_source[m_position++]; } +bool Lexer::consume_decimal_number() +{ + if (!is_ascii_digit(m_current_char)) + return false; + + while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) { + consume(); + } + return true; +} + bool Lexer::consume_exponent() { consume(); @@ -202,21 +213,22 @@ bool Lexer::consume_exponent() if (!is_ascii_digit(m_current_char)) return false; - while (is_ascii_digit(m_current_char)) { - consume(); - } - return true; + return consume_decimal_number(); +} + +static constexpr bool is_octal_digit(char ch) +{ + return ch >= '0' && ch <= '7'; } bool Lexer::consume_octal_number() { consume(); - if (!(m_current_char >= '0' && m_current_char <= '7')) + if (!is_octal_digit(m_current_char)) return false; - while (m_current_char >= '0' && m_current_char <= '7') { + while (is_octal_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_octal_digit)) consume(); - } return true; } @@ -227,24 +239,38 @@ bool Lexer::consume_hexadecimal_number() if (!is_ascii_hex_digit(m_current_char)) return false; - while (is_ascii_hex_digit(m_current_char)) + while (is_ascii_hex_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_hex_digit)) consume(); return true; } +static constexpr bool is_binary_digit(char ch) +{ + return ch == '0' || ch == '1'; +} + bool Lexer::consume_binary_number() { consume(); - if (!(m_current_char == '0' || m_current_char == '1')) + if (!is_binary_digit(m_current_char)) return false; - while (m_current_char == '0' || m_current_char == '1') + while (is_binary_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_binary_digit)) consume(); return true; } +template +bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const +{ + if (m_position >= m_source.length()) + return false; + return m_current_char == '_' + && callback(m_source[m_position]); +} + bool Lexer::match(char a, char b) const { if (m_position >= m_source.length()) @@ -460,7 +486,7 @@ Token Lexer::next() if (m_current_char == '.') { // decimal consume(); - while (is_ascii_digit(m_current_char)) + while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) consume(); if (m_current_char == 'e' || m_current_char == 'E') is_invalid_numeric_literal = !consume_exponent(); @@ -494,11 +520,11 @@ Token Lexer::next() // octal without '0o' prefix. Forbidden in 'strict mode' do { consume(); - } while (is_ascii_digit(m_current_char)); + } while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)); } } else { // 1...9 or period - while (is_ascii_digit(m_current_char)) + while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) consume(); if (m_current_char == 'n') { consume(); @@ -506,7 +532,7 @@ Token Lexer::next() } else { if (m_current_char == '.') { consume(); - while (is_ascii_digit(m_current_char)) + while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) consume(); } if (m_current_char == 'e' || m_current_char == 'E') diff --git a/Userland/Libraries/LibJS/Lexer.h b/Userland/Libraries/LibJS/Lexer.h index 616014499a..a4d1613c42 100644 --- a/Userland/Libraries/LibJS/Lexer.h +++ b/Userland/Libraries/LibJS/Lexer.h @@ -29,6 +29,7 @@ private: bool consume_octal_number(); bool consume_hexadecimal_number(); bool consume_binary_number(); + bool consume_decimal_number(); bool is_eof() const; bool is_line_terminator() const; bool is_identifier_start() const; @@ -40,6 +41,8 @@ private: bool match(char, char) const; bool match(char, char, char) const; bool match(char, char, char, char) const; + template + bool match_numeric_literal_separator_followed_by(Callback) const; bool slash_means_division() const; StringView m_source; diff --git a/Userland/Libraries/LibJS/Token.cpp b/Userland/Libraries/LibJS/Token.cpp index 177fac6d61..6f2b0ebde0 100644 --- a/Userland/Libraries/LibJS/Token.cpp +++ b/Userland/Libraries/LibJS/Token.cpp @@ -53,7 +53,16 @@ TokenCategory Token::category() const double Token::double_value() const { VERIFY(type() == TokenType::NumericLiteral); - String value_string(m_value); + + StringBuilder builder; + + for (auto ch : m_value) { + if (ch == '_') + continue; + builder.append(ch); + } + + String value_string = builder.to_string(); if (value_string[0] == '0' && value_string.length() >= 2) { if (value_string[1] == 'x' || value_string[1] == 'X') { // hexadecimal