LibJS+LibCrypto: Allow '_' as a numeric literal separator :^)

This patch adds support for the NumericLiteralSeparator concept from the ECMAScript grammar.
2025-07-26 07:17:35 +00:00 · 2021-06-26 16:30:05 +02:00 · 2021-06-26 16:30:05 +02:00 · 49018553d3
commit 49018553d3
parent 527c639c1f
4 changed files with 61 additions and 15 deletions
--- a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp
+++ b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp
@ -72,6 +72,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base10(const String& str)
    UnsignedBigInteger ten { 10 };
    for (auto& c : str) {
        if (c == '_')
            continue;
        result = result.multiplied_by(ten).plus(parse_ascii_digit(c));
    }
    return result;
@ -83,6 +85,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base2(const String& str)
    UnsignedBigInteger two { 2 };
    for (auto& c : str) {
        if (c == '_')
            continue;
        result = result.multiplied_by(two).plus(parse_ascii_digit(c));
    }
    return result;
@ -94,6 +98,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base8(const String& str)
    UnsignedBigInteger eight { 8 };
    for (auto& c : str) {
        if (c == '_')
            continue;
        result = result.multiplied_by(eight).plus(parse_ascii_digit(c));
    }
    return result;
@ -105,6 +111,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base16(const String& str)
    UnsignedBigInteger sixteen { 16 };
    for (auto& c : str) {
        if (c == '_')
            continue;
        result = result.multiplied_by(sixteen).plus(parse_ascii_hex_digit(c));
    }
    return result;
--- a/Userland/Libraries/LibJS/Lexer.cpp
+++ b/Userland/Libraries/LibJS/Lexer.cpp
@ -193,6 +193,17 @@ void Lexer::consume()
    m_current_char = m_source[m_position++];
 }
 bool Lexer::consume_decimal_number()
 {
    if (!is_ascii_digit(m_current_char))
        return false;
    while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) {
        consume();
    }
    return true;
 }
 bool Lexer::consume_exponent()
 {
    consume();
@ -202,21 +213,22 @@ bool Lexer::consume_exponent()
    if (!is_ascii_digit(m_current_char))
        return false;
-    while (is_ascii_digit(m_current_char)) {
+    return consume_decimal_number();
-        consume();
+}
-    }
+
-    return true;
+static constexpr bool is_octal_digit(char ch)
 {
    return ch >= '0' && ch <= '7';
 }
 bool Lexer::consume_octal_number()
 {
    consume();
-    if (!(m_current_char >= '0' && m_current_char <= '7'))
+    if (!is_octal_digit(m_current_char))
        return false;
-    while (m_current_char >= '0' && m_current_char <= '7') {
+    while (is_octal_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_octal_digit))
        consume();
    }
    return true;
 }
@ -227,24 +239,38 @@ bool Lexer::consume_hexadecimal_number()
    if (!is_ascii_hex_digit(m_current_char))
        return false;
-    while (is_ascii_hex_digit(m_current_char))
+    while (is_ascii_hex_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_hex_digit))
        consume();
    return true;
 }
 static constexpr bool is_binary_digit(char ch)
 {
    return ch == '0' || ch == '1';
 }
 bool Lexer::consume_binary_number()
 {
    consume();
-    if (!(m_current_char == '0' || m_current_char == '1'))
+    if (!is_binary_digit(m_current_char))
        return false;
-    while (m_current_char == '0' || m_current_char == '1')
+    while (is_binary_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_binary_digit))
        consume();
    return true;
 }
 template<typename Callback>
 bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
 {
    if (m_position >= m_source.length())
        return false;
    return m_current_char == '_'
        && callback(m_source[m_position]);
 }
 bool Lexer::match(char a, char b) const
 {
    if (m_position >= m_source.length())
@ -460,7 +486,7 @@ Token Lexer::next()
            if (m_current_char == '.') {
                // decimal
                consume();
-                while (is_ascii_digit(m_current_char))
+                while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                    consume();
                if (m_current_char == 'e' || m_current_char == 'E')
                    is_invalid_numeric_literal = !consume_exponent();
@ -494,11 +520,11 @@ Token Lexer::next()
                // octal without '0o' prefix. Forbidden in 'strict mode'
                do {
                    consume();
-                } while (is_ascii_digit(m_current_char));
+                } while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit));
            }
        } else {
            // 1...9 or period
-            while (is_ascii_digit(m_current_char))
+            while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                consume();
            if (m_current_char == 'n') {
                consume();
@ -506,7 +532,7 @@ Token Lexer::next()
            } else {
                if (m_current_char == '.') {
                    consume();
-                    while (is_ascii_digit(m_current_char))
+                    while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                        consume();
                }
                if (m_current_char == 'e' || m_current_char == 'E')
--- a/Userland/Libraries/LibJS/Lexer.h
+++ b/Userland/Libraries/LibJS/Lexer.h
@ -29,6 +29,7 @@ private:
    bool consume_octal_number();
    bool consume_hexadecimal_number();
    bool consume_binary_number();
    bool consume_decimal_number();
    bool is_eof() const;
    bool is_line_terminator() const;
    bool is_identifier_start() const;
@ -40,6 +41,8 @@ private:
    bool match(char, char) const;
    bool match(char, char, char) const;
    bool match(char, char, char, char) const;
    template<typename Callback>
    bool match_numeric_literal_separator_followed_by(Callback) const;
    bool slash_means_division() const;
    StringView m_source;
--- a/Userland/Libraries/LibJS/Token.cpp
+++ b/Userland/Libraries/LibJS/Token.cpp
@ -53,7 +53,16 @@ TokenCategory Token::category() const
 double Token::double_value() const
 {
    VERIFY(type() == TokenType::NumericLiteral);
-    String value_string(m_value);
+
    StringBuilder builder;
    for (auto ch : m_value) {
        if (ch == '_')
            continue;
        builder.append(ch);
    }
    String value_string = builder.to_string();
    if (value_string[0] == '0' && value_string.length() >= 2) {
        if (value_string[1] == 'x' || value_string[1] == 'X') {
            // hexadecimal