mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 11:38:11 +00:00
LibWeb: Use the term "ident sequence" instead of "name"
This is an editorial change in the December 2021 version of SYNTAX-3: https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/
This commit is contained in:
parent
7cab86ad28
commit
97e174afcd
2 changed files with 33 additions and 31 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, the SerenityOS developers.
|
||||
* Copyright (c) 2021, Sam Atkins <atkinssj@serenityos.org>
|
||||
* Copyright (c) 2021-2022, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -44,7 +44,8 @@ static inline bool is_low_line(u32 code_point)
|
|||
return code_point == 0x5F;
|
||||
}
|
||||
|
||||
static inline bool is_name_start_code_point(u32 code_point)
|
||||
// https://www.w3.org/TR/css-syntax-3/#ident-start-code-point
|
||||
static inline bool is_ident_start_code_point(u32 code_point)
|
||||
{
|
||||
// FIXME: We use !is_ascii() for "non-ASCII code point" in the spec, but it's not quite right -
|
||||
// it treats EOF as a valid! The spec also lacks a definition of code point. For now, the
|
||||
|
@ -57,9 +58,10 @@ static inline bool is_hyphen_minus(u32 code_point)
|
|||
return code_point == 0x2D;
|
||||
}
|
||||
|
||||
static inline bool is_name_code_point(u32 code_point)
|
||||
// https://www.w3.org/TR/css-syntax-3/#ident-code-point
|
||||
static inline bool is_ident_code_point(u32 code_point)
|
||||
{
|
||||
return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
|
||||
return is_ident_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
|
||||
}
|
||||
|
||||
static inline bool is_non_printable(u32 code_point)
|
||||
|
@ -424,8 +426,8 @@ Token Tokenizer::consume_an_ident_like_token()
|
|||
// This section describes how to consume an ident-like token from a stream of code points.
|
||||
// It returns an <ident-token>, <function-token>, <url-token>, or <bad-url-token>.
|
||||
|
||||
// Consume a name, and let string be the result.
|
||||
auto string = consume_a_name();
|
||||
// Consume an ident sequence, and let string be the result.
|
||||
auto string = consume_an_ident_sequence();
|
||||
|
||||
// If string’s value is an ASCII case-insensitive match for "url", and the next input code
|
||||
// point is U+0028 LEFT PARENTHESIS ((), consume it.
|
||||
|
@ -640,15 +642,15 @@ float Tokenizer::convert_a_string_to_a_number(StringView string)
|
|||
}
|
||||
|
||||
// https://www.w3.org/TR/css-syntax-3/#consume-name
|
||||
String Tokenizer::consume_a_name()
|
||||
String Tokenizer::consume_an_ident_sequence()
|
||||
{
|
||||
// This section describes how to consume a name from a stream of code points.
|
||||
// This section describes how to consume an ident sequence from a stream of code points.
|
||||
// It returns a string containing the largest name that can be formed from adjacent
|
||||
// code points in the stream, starting from the first.
|
||||
//
|
||||
// Note: This algorithm does not do the verification of the first few code points that
|
||||
// are necessary to ensure the returned code points would constitute an <ident-token>.
|
||||
// If that is the intended use, ensure that the stream starts with an identifier before
|
||||
// If that is the intended use, ensure that the stream starts with an ident sequence before
|
||||
// calling this algorithm.
|
||||
|
||||
// Let result initially be an empty string.
|
||||
|
@ -662,7 +664,7 @@ String Tokenizer::consume_a_name()
|
|||
break;
|
||||
|
||||
// name code point
|
||||
if (is_name_code_point(input)) {
|
||||
if (is_ident_code_point(input)) {
|
||||
// Append the code point to result.
|
||||
result.append_code_point(input);
|
||||
continue;
|
||||
|
@ -837,15 +839,15 @@ Token Tokenizer::consume_a_numeric_token()
|
|||
// Consume a number and let number be the result.
|
||||
auto number = consume_a_number();
|
||||
|
||||
// If the next 3 input code points would start an identifier, then:
|
||||
if (would_start_an_identifier(peek_triplet())) {
|
||||
// If the next 3 input code points would start an ident sequence, then:
|
||||
if (would_start_an_ident_sequence(peek_triplet())) {
|
||||
// 1. Create a <dimension-token> with the same value and type flag as number,
|
||||
// and a unit set initially to the empty string.
|
||||
auto token = create_new_token(Token::Type::Dimension);
|
||||
token.m_number_value = number;
|
||||
|
||||
// 2. Consume a name. Set the <dimension-token>’s unit to the returned value.
|
||||
auto unit = consume_a_name();
|
||||
// 2. Consume an ident sequence. Set the <dimension-token>’s unit to the returned value.
|
||||
auto unit = consume_an_ident_sequence();
|
||||
VERIFY(!unit.is_empty());
|
||||
// NOTE: We intentionally store this in the `value`, to save space.
|
||||
token.m_value = move(unit);
|
||||
|
@ -938,9 +940,9 @@ bool Tokenizer::is_valid_escape_sequence(U32Twin values)
|
|||
}
|
||||
|
||||
// https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
|
||||
bool Tokenizer::would_start_an_identifier(U32Triplet values)
|
||||
bool Tokenizer::would_start_an_ident_sequence(U32Triplet values)
|
||||
{
|
||||
// This section describes how to check if three code points would start an identifier.
|
||||
// This section describes how to check if three code points would start an ident sequence.
|
||||
// The algorithm described here can be called explicitly with three code points, or
|
||||
// can be called with the input stream itself. In the latter case, the three code
|
||||
// points in question are the current input code point and the next two input code
|
||||
|
@ -954,14 +956,14 @@ bool Tokenizer::would_start_an_identifier(U32Triplet values)
|
|||
if (is_hyphen_minus(values.first)) {
|
||||
// If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
|
||||
// or the second and third code points are a valid escape, return true.
|
||||
if (is_name_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23()))
|
||||
if (is_ident_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23()))
|
||||
return true;
|
||||
// Otherwise, return false.
|
||||
return false;
|
||||
}
|
||||
|
||||
// name-start code point
|
||||
if (is_name_start_code_point(values.first)) {
|
||||
if (is_ident_start_code_point(values.first)) {
|
||||
// Return true.
|
||||
return true;
|
||||
}
|
||||
|
@ -1117,22 +1119,22 @@ Token Tokenizer::consume_a_token()
|
|||
if (is_number_sign(input)) {
|
||||
dbgln_if(CSS_TOKENIZER_DEBUG, "is number sign");
|
||||
|
||||
// If the next input code point is a name code point or the next two input code points
|
||||
// If the next input code point is an ident code point or the next two input code points
|
||||
// are a valid escape, then:
|
||||
auto next_input = peek_code_point();
|
||||
auto maybe_escape = peek_twin();
|
||||
|
||||
if (is_name_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
|
||||
if (is_ident_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
|
||||
// 1. Create a <hash-token>.
|
||||
auto token = create_new_token(Token::Type::Hash);
|
||||
|
||||
// 2. If the next 3 input code points would start an identifier, set the <hash-token>’s
|
||||
// 2. If the next 3 input code points would start an ident sequence, set the <hash-token>’s
|
||||
// type flag to "id".
|
||||
if (would_start_an_identifier(peek_triplet()))
|
||||
if (would_start_an_ident_sequence(peek_triplet()))
|
||||
token.m_hash_type = Token::HashType::Id;
|
||||
|
||||
// 3. Consume a name, and set the <hash-token>’s value to the returned string.
|
||||
auto name = consume_a_name();
|
||||
// 3. Consume an ident sequence, and set the <hash-token>’s value to the returned string.
|
||||
auto name = consume_an_ident_sequence();
|
||||
token.m_value = move(name);
|
||||
|
||||
// 4. Return the <hash-token>.
|
||||
|
@ -1207,7 +1209,7 @@ Token Tokenizer::consume_a_token()
|
|||
|
||||
// Otherwise, if the input stream starts with an identifier, reconsume the current
|
||||
// input code point, consume an ident-like token, and return it.
|
||||
if (would_start_an_identifier(start_of_input_stream_triplet())) {
|
||||
if (would_start_an_ident_sequence(start_of_input_stream_triplet())) {
|
||||
reconsume_current_input_code_point();
|
||||
return consume_an_ident_like_token();
|
||||
}
|
||||
|
@ -1265,10 +1267,10 @@ Token Tokenizer::consume_a_token()
|
|||
// U+0040 COMMERCIAL AT (@)
|
||||
if (is_at(input)) {
|
||||
dbgln_if(CSS_TOKENIZER_DEBUG, "is at");
|
||||
// If the next 3 input code points would start an identifier, consume a name, create
|
||||
// If the next 3 input code points would start an ident sequence, consume an ident sequence, create
|
||||
// an <at-keyword-token> with its value set to the returned value, and return it.
|
||||
if (would_start_an_identifier(peek_triplet())) {
|
||||
auto name = consume_a_name();
|
||||
if (would_start_an_ident_sequence(peek_triplet())) {
|
||||
auto name = consume_an_ident_sequence();
|
||||
return create_value_token(Token::Type::AtKeyword, name);
|
||||
}
|
||||
|
||||
|
@ -1329,7 +1331,7 @@ Token Tokenizer::consume_a_token()
|
|||
}
|
||||
|
||||
// name-start code point
|
||||
if (is_name_start_code_point(input)) {
|
||||
if (is_ident_start_code_point(input)) {
|
||||
dbgln_if(CSS_TOKENIZER_DEBUG, "is name start");
|
||||
// Reconsume the current input code point, consume an ident-like token, and return it.
|
||||
reconsume_current_input_code_point();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue