mirror of
https://github.com/RGBCube/serenity
synced 2025-07-24 21:37:34 +00:00
LibWeb: Convert numeric tokens to numbers in CSS Tokenizer
The spec wants us to produce numeric values as the Tokenizer sees them, rather than waiting until the parse stage. This is a first step towards that.
This commit is contained in:
parent
d2ef8b29e8
commit
f6869797a7
3 changed files with 90 additions and 5 deletions
|
@ -152,6 +152,7 @@ private:
|
||||||
StringBuilder m_unit;
|
StringBuilder m_unit;
|
||||||
HashType m_hash_type { HashType::Unrestricted };
|
HashType m_hash_type { HashType::Unrestricted };
|
||||||
NumberType m_number_type { NumberType::Integer };
|
NumberType m_number_type { NumberType::Integer };
|
||||||
|
double m_number_value { 0 };
|
||||||
|
|
||||||
Position m_start_position;
|
Position m_start_position;
|
||||||
Position m_end_position;
|
Position m_end_position;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <LibTextCodec/Decoder.h>
|
#include <LibTextCodec/Decoder.h>
|
||||||
#include <LibWeb/CSS/Parser/Tokenizer.h>
|
#include <LibWeb/CSS/Parser/Tokenizer.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
// U+FFFD REPLACEMENT CHARACTER (<28>)
|
// U+FFFD REPLACEMENT CHARACTER (<28>)
|
||||||
#define REPLACEMENT_CHARACTER 0xFFFD
|
#define REPLACEMENT_CHARACTER 0xFFFD
|
||||||
|
@ -469,7 +470,84 @@ CSSNumber Tokenizer::consume_a_number()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { repr.to_string(), type };
|
return { repr.to_string(), convert_a_string_to_a_number(repr.string_view()), type };
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://www.w3.org/TR/css-syntax-3/#convert-string-to-number
|
||||||
|
double Tokenizer::convert_a_string_to_a_number(StringView string)
|
||||||
|
{
|
||||||
|
auto code_point_at = [&](size_t index) -> u32 {
|
||||||
|
if (index < string.length())
|
||||||
|
return string[index];
|
||||||
|
return TOKENIZER_EOF;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This algorithm does not do any verification to ensure that the string contains only a number.
|
||||||
|
// Ensure that the string contains only a valid CSS number before calling this algorithm.
|
||||||
|
|
||||||
|
// Divide the string into seven components, in order from left to right:
|
||||||
|
size_t position = 0;
|
||||||
|
|
||||||
|
// 1. A sign: a single U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-), or the empty string.
|
||||||
|
// Let s [sign] be the number -1 if the sign is U+002D HYPHEN-MINUS (-); otherwise, let s be the number 1.
|
||||||
|
int sign = 1;
|
||||||
|
if (is_plus_sign(code_point_at(position)) || is_hyphen_minus(code_point_at(position))) {
|
||||||
|
sign = is_hyphen_minus(code_point_at(position)) ? -1 : 1;
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. An integer part: zero or more digits.
|
||||||
|
// If there is at least one digit, let i [integer_part] be the number formed by interpreting the digits
|
||||||
|
// as a base-10 integer; otherwise, let i be the number 0.
|
||||||
|
double integer_part = 0;
|
||||||
|
while (is_ascii_digit(code_point_at(position))) {
|
||||||
|
integer_part = (integer_part * 10) + (code_point_at(position) - '0');
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. A decimal point: a single U+002E FULL STOP (.), or the empty string.
|
||||||
|
if (is_full_stop(code_point_at(position)))
|
||||||
|
position++;
|
||||||
|
|
||||||
|
// 4. A fractional part: zero or more digits.
|
||||||
|
// If there is at least one digit, let f [fractional_part] be the number formed by interpreting the digits
|
||||||
|
// as a base-10 integer and d [fractional_digits] be the number of digits; otherwise, let f and d be the number 0.
|
||||||
|
double fractional_part = 0;
|
||||||
|
int fractional_digits = 0;
|
||||||
|
while (is_ascii_digit(code_point_at(position))) {
|
||||||
|
fractional_part = (fractional_part * 10) + (code_point_at(position) - '0');
|
||||||
|
position++;
|
||||||
|
fractional_digits++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. An exponent indicator: a single U+0045 LATIN CAPITAL LETTER E (E) or U+0065 LATIN SMALL LETTER E (e),
|
||||||
|
// or the empty string.
|
||||||
|
if (is_e(code_point_at(position)) || is_E(code_point_at(position)))
|
||||||
|
position++;
|
||||||
|
|
||||||
|
// 6. An exponent sign: a single U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-), or the empty string.
|
||||||
|
// Let t [exponent_sign] be the number -1 if the sign is U+002D HYPHEN-MINUS (-); otherwise, let t be the number 1.
|
||||||
|
int exponent_sign = 1;
|
||||||
|
if (is_plus_sign(code_point_at(position)) || is_hyphen_minus(code_point_at(position))) {
|
||||||
|
exponent_sign = is_hyphen_minus(code_point_at(position)) ? -1 : 1;
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. An exponent: zero or more digits.
|
||||||
|
// If there is at least one digit, let e [exponent] be the number formed by interpreting the digits as a
|
||||||
|
// base-10 integer; otherwise, let e be the number 0.
|
||||||
|
double exponent = 0;
|
||||||
|
while (is_ascii_digit(code_point_at(position))) {
|
||||||
|
exponent = (exponent * 10) + (code_point_at(position) - '0');
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: We checked before calling this function that the string is a valid number,
|
||||||
|
// so if there is anything at the end, something has gone wrong!
|
||||||
|
VERIFY(position == string.length());
|
||||||
|
|
||||||
|
// Return the number s·(i + f·10^-d)·10^te.
|
||||||
|
return sign * (integer_part + fractional_part * pow(10, -fractional_digits)) * pow(10, exponent_sign * exponent);
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://www.w3.org/TR/css-syntax-3/#consume-name
|
// https://www.w3.org/TR/css-syntax-3/#consume-name
|
||||||
|
@ -601,8 +679,9 @@ Token Tokenizer::consume_a_numeric_token()
|
||||||
auto number = consume_a_number();
|
auto number = consume_a_number();
|
||||||
if (would_start_an_identifier()) {
|
if (would_start_an_identifier()) {
|
||||||
auto token = create_new_token(Token::Type::Dimension);
|
auto token = create_new_token(Token::Type::Dimension);
|
||||||
token.m_value.append(number.value);
|
token.m_value.append(number.string);
|
||||||
token.m_number_type = number.type;
|
token.m_number_type = number.type;
|
||||||
|
token.m_number_value = number.value;
|
||||||
|
|
||||||
auto unit = consume_a_name();
|
auto unit = consume_a_name();
|
||||||
VERIFY(!unit.is_empty() && !unit.is_whitespace());
|
VERIFY(!unit.is_empty() && !unit.is_whitespace());
|
||||||
|
@ -615,13 +694,16 @@ Token Tokenizer::consume_a_numeric_token()
|
||||||
(void)next_code_point();
|
(void)next_code_point();
|
||||||
|
|
||||||
auto token = create_new_token(Token::Type::Percentage);
|
auto token = create_new_token(Token::Type::Percentage);
|
||||||
token.m_value.append(number.value);
|
token.m_value.append(number.string);
|
||||||
|
token.m_number_type = number.type;
|
||||||
|
token.m_number_value = number.value;
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto token = create_new_token(Token::Type::Number);
|
auto token = create_new_token(Token::Type::Number);
|
||||||
token.m_value.append(number.value);
|
token.m_value.append(number.string);
|
||||||
token.m_number_type = number.type;
|
token.m_number_type = number.type;
|
||||||
|
token.m_number_value = number.value;
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,8 @@ public:
|
||||||
|
|
||||||
class CSSNumber {
|
class CSSNumber {
|
||||||
public:
|
public:
|
||||||
String value;
|
String string;
|
||||||
|
double value { 0 };
|
||||||
Token::NumberType type {};
|
Token::NumberType type {};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -86,6 +87,7 @@ private:
|
||||||
[[nodiscard]] Token consume_a_numeric_token();
|
[[nodiscard]] Token consume_a_numeric_token();
|
||||||
[[nodiscard]] Token consume_an_ident_like_token();
|
[[nodiscard]] Token consume_an_ident_like_token();
|
||||||
[[nodiscard]] CSSNumber consume_a_number();
|
[[nodiscard]] CSSNumber consume_a_number();
|
||||||
|
[[nodiscard]] double convert_a_string_to_a_number(StringView);
|
||||||
[[nodiscard]] String consume_a_name();
|
[[nodiscard]] String consume_a_name();
|
||||||
[[nodiscard]] u32 consume_escaped_code_point();
|
[[nodiscard]] u32 consume_escaped_code_point();
|
||||||
[[nodiscard]] Token consume_a_url_token();
|
[[nodiscard]] Token consume_a_url_token();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue