LibJS: Add numeric literal parsing for different bases and exponents

2025-09-16 12:36:18 +00:00 · 2020-04-05 14:20:58 +02:00 · 2020-04-05 14:20:58 +02:00 · 500f6d9e3a
commit 500f6d9e3a
parent b82a2239c6
4 changed files with 107 additions and 5 deletions
--- a/Libraries/LibJS/Lexer.cpp
+++ b/Libraries/LibJS/Lexer.cpp
@ -156,6 +156,16 @@ void Lexer::consume()
    m_current_char = m_source[m_position++];
 }

+void Lexer::consume_exponent()
+{
+    consume();
+    if (m_current_char == '-' || m_current_char == '+')
+        consume();
+    while (isdigit(m_current_char)) {
+        consume();
+    }
+}
+
 bool Lexer::is_eof() const
 {
    return m_current_char == EOF;
@ -186,6 +196,11 @@ bool Lexer::is_block_comment_end() const
    return m_current_char == '*' && m_position < m_source.length() && m_source[m_position] == '/';
 }

+bool Lexer::is_numeric_literal_start() const
+{
+    return isdigit(m_current_char) || (m_current_char == '.' && m_position < m_source.length() && isdigit(m_source[m_position]));
+}
+
 void Lexer::syntax_error(const char* msg)
 {
    m_has_errors = true;
@ -235,11 +250,60 @@ Token Lexer::next()
        } else {
            token_type = it->value;
        }
+    } else if (is_numeric_literal_start()) {
+        if (m_current_char == '0') {
+            consume();
+            if (m_current_char == '.') {
+                // decimal
+                consume();
+                while (isdigit(m_current_char)) {
+                    consume();
+                }
+                if (m_current_char == 'e' || m_current_char == 'E') {
+                    consume_exponent();
+                }
+            } else if (m_current_char == 'e' || m_current_char == 'E') {
+                consume_exponent();
+            } else if (m_current_char == 'o' || m_current_char == 'O') {
+                // octal
+                consume();
+                while (m_current_char >= '0' && m_current_char <= '7') {
+                    consume();
+                }
+            } else if (m_current_char == 'b' || m_current_char == 'B') {
+                // binary
+                consume();
+                while (m_current_char == '0' || m_current_char == '1') {
+                    consume();
+                }
+            } else if (m_current_char == 'x' || m_current_char == 'X') {
+                // hexadecimal
+                consume();
+                while (isxdigit(m_current_char)) {
+                    consume();
+                }
            } else if (isdigit(m_current_char)) {
+                // octal without 'O' prefix. Forbidden in 'strict mode'
+                // FIXME: We need to make sure this produces a syntax error when in strict mode
+                do {
                    consume();
-        while (m_current_char == '.' || isdigit(m_current_char)) {
+                } while (isdigit(m_current_char));
+            }
+        } else {
+            // 1...9 or period
+            while (isdigit(m_current_char)) {
                consume();
            }
+            if (m_current_char == '.') {
+                consume();
+                while (isdigit(m_current_char)) {
+                    consume();
+                }
+            }
+            if (m_current_char == 'e' || m_current_char == 'E') {
+                consume_exponent();
+            }
+        }
        token_type = TokenType::NumericLiteral;
    } else if (m_current_char == '"' || m_current_char == '\'') {
        char stop_char = m_current_char;
@ -330,5 +394,4 @@ Token Lexer::next()

    return m_current_token;
 }
-
 }
--- a/Libraries/LibJS/Lexer.h
+++ b/Libraries/LibJS/Lexer.h
@ -42,12 +42,14 @@ public:

 private:
    void consume();
+    void consume_exponent();
    bool is_eof() const;
    bool is_identifier_start() const;
    bool is_identifier_middle() const;
    bool is_line_comment_start() const;
    bool is_block_comment_start() const;
    bool is_block_comment_end() const;
+    bool is_numeric_literal_start() const;

    void syntax_error(const char*);

--- a/Libraries/LibJS/Tests/numeric-literals-basic.js
+++ b/Libraries/LibJS/Tests/numeric-literals-basic.js
@ -0,0 +1,20 @@
+try {
+    assert(0xff === 255);
+    assert(0XFF === 255);
+    assert(0o10 === 8);
+    assert(0O10 === 8);
+    assert(0b10 === 2);
+    assert(0B10 === 2);
+    assert(1e3 === 1000);
+    assert(1e+3 === 1000);
+    assert(1e-3 === 0.001);
+    assert(.1 === 0.1);
+    assert(.1e1 === 1);
+    assert(0.1e1 === 1);
+    assert(.1e+1 === 1);
+    assert(0.1e+1 === 1);
+
+    console.log("PASS");
+} catch (e) {
+    console.log("FAIL: " + e);
+}
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@ -27,6 +27,7 @@
 #include "Token.h"
 #include <AK/Assertions.h>
 #include <AK/StringBuilder.h>
+#include <ctype.h>

 namespace JS {

@ -52,7 +53,23 @@ const char* Token::name() const
 double Token::double_value() const
 {
    ASSERT(type() == TokenType::NumericLiteral);
-    return strtod(String(m_value).characters(), nullptr);
+    String value_string(m_value);
+    if (value_string[0] == '0' && value_string.length() >= 2) {
+        if (value_string[1] == 'x' || value_string[1] == 'X') {
+            // hexadecimal
+            return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 16));
+        } else if (value_string[1] == 'o' || value_string[1] == 'O') {
+            // octal
+            return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 8));
+        } else if (value_string[1] == 'b' || value_string[1] == 'B') {
+            // binary
+            return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 2));
+        } else if (isdigit(value_string[1])) {
+            // also octal, but syntax error in strict mode
+            return static_cast<double>(strtoul(value_string.characters() + 1, nullptr, 8));
+        }
+    }
+    return strtod(value_string.characters(), nullptr);
 }

 String Token::string_value() const