1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-24 19:47:42 +00:00

LibWeb: Make CSS 'An+B' parsing spec-compliant

Parsing this pattern from CSS tokens turns out to be slightly crazy, but
thankfully well documented in the spec.

The spec lists the cases in order of simple -> complicated, but this
would cause problems in code, since `<n-dimension> <signed-.integer>`
would never by reached, as `<n-dimension>` comes before. Instead, I
have grouped them by their first token.

Also renamed the NthChildPattern class to ANPlusBPattern, to match spec
terminology.
This commit is contained in:
Sam Atkins 2021-07-24 21:22:44 +01:00 committed by Andreas Kling
parent 8d0ff98eff
commit 6034fc0ee6
7 changed files with 333 additions and 92 deletions

View file

@ -1046,10 +1046,10 @@ public:
pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::LastOfType; pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::LastOfType;
} else if (pseudo_name.starts_with("nth-child", CaseSensitivity::CaseInsensitive)) { } else if (pseudo_name.starts_with("nth-child", CaseSensitivity::CaseInsensitive)) {
pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild; pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild;
pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::NthChildPattern::parse(capture_selector_args(pseudo_name)); pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::ANPlusBPattern::parse(capture_selector_args(pseudo_name));
} else if (pseudo_name.starts_with("nth-last-child", CaseSensitivity::CaseInsensitive)) { } else if (pseudo_name.starts_with("nth-last-child", CaseSensitivity::CaseInsensitive)) {
pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild; pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild;
pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::NthChildPattern::parse(capture_selector_args(pseudo_name)); pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::ANPlusBPattern::parse(capture_selector_args(pseudo_name));
} else if (pseudo_name.equals_ignoring_case("before")) { } else if (pseudo_name.equals_ignoring_case("before")) {
simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::Before; simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::Before;
} else if (pseudo_name.equals_ignoring_case("after")) { } else if (pseudo_name.equals_ignoring_case("after")) {

View file

@ -6,6 +6,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/CharacterTypes.h>
#include <AK/Debug.h> #include <AK/Debug.h>
#include <AK/NonnullRefPtrVector.h> #include <AK/NonnullRefPtrVector.h>
#include <AK/SourceLocation.h> #include <AK/SourceLocation.h>
@ -603,7 +604,6 @@ Result<Selector::SimpleSelector, Parser::SelectorParsingResult> Parser::parse_si
simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value(); simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value();
} else { } else {
dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format"); dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format");
function_values.dump_all_tokens();
return SelectorParsingResult::SyntaxError; return SelectorParsingResult::SyntaxError;
} }
} else if (pseudo_function.name().equals_ignoring_case("nth-last-child")) { } else if (pseudo_function.name().equals_ignoring_case("nth-last-child")) {
@ -611,11 +611,9 @@ Result<Selector::SimpleSelector, Parser::SelectorParsingResult> Parser::parse_si
auto function_values = TokenStream<StyleComponentValueRule>(pseudo_function.values()); auto function_values = TokenStream<StyleComponentValueRule>(pseudo_function.values());
auto nth_child_pattern = parse_a_n_plus_b_pattern(function_values); auto nth_child_pattern = parse_a_n_plus_b_pattern(function_values);
if (nth_child_pattern.has_value()) { if (nth_child_pattern.has_value()) {
dbgln("+++ Got An+B pattern: '{}'", nth_child_pattern.value().to_string());
simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value(); simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value();
} else { } else {
dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format"); dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format");
function_values.dump_all_tokens();
return SelectorParsingResult::SyntaxError; return SelectorParsingResult::SyntaxError;
} }
} else { } else {
@ -1491,7 +1489,7 @@ RefPtr<StyleValue> Parser::parse_numeric_value(ParsingContext const&, StyleCompo
if (component_value.is(Token::Type::Number)) { if (component_value.is(Token::Type::Number)) {
auto number = component_value.token(); auto number = component_value.token();
if (number.m_number_type == Token::NumberType::Integer) { if (number.m_number_type == Token::NumberType::Integer) {
return NumericStyleValue::create(number.integer()); return NumericStyleValue::create(number.to_integer());
} else { } else {
auto float_value = try_parse_float(number.m_value.string_view()); auto float_value = try_parse_float(number.m_value.string_view());
if (float_value.has_value()) if (float_value.has_value())
@ -1748,7 +1746,7 @@ RefPtr<StyleValue> Parser::parse_css_value(ParsingContext const& context, Proper
if (takes_integer_value(property_id) && component_value.is(Token::Type::Number)) { if (takes_integer_value(property_id) && component_value.is(Token::Type::Number)) {
auto number = component_value.token(); auto number = component_value.token();
if (number.m_number_type == Token::NumberType::Integer) { if (number.m_number_type == Token::NumberType::Integer) {
return LengthStyleValue::create(Length::make_px(number.integer())); return LengthStyleValue::create(Length::make_px(number.to_integer()));
} }
} }
@ -1776,90 +1774,306 @@ RefPtr<StyleValue> Parser::parse_css_value(ParsingContext const& context, Proper
return {}; return {};
} }
Optional<Selector::SimpleSelector::NthChildPattern> Parser::parse_nth_child_pattern(TokenStream<StyleComponentValueRule>& values) Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<StyleComponentValueRule>& values)
{ {
dbgln_if(CSS_PARSER_DEBUG, "Parser::parse_nth_child_pattern"); dbgln_if(CSS_PARSER_DEBUG, "Parser::parse_a_n_plus_b_pattern");
Selector::SimpleSelector::NthChildPattern pattern; int a = 0;
int b = 0;
auto current_value = values.next_token(); auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
if (current_value.is(Token::Type::Ident)) { if constexpr (CSS_PARSER_DEBUG) {
auto ident = current_value.token().ident(); dbgln("Invalid An+B value:");
if (ident.equals_ignoring_case("odd")) { values.dump_all_tokens();
pattern.step_size = 2;
pattern.offset = 1;
return pattern;
} else if (ident.equals_ignoring_case("even")) {
pattern.step_size = 2;
return pattern;
} }
} return {};
// Try to match any of following patterns:
// 1. An+B
// 2. An
// 3. B
// ...where "A" is "step_size", "B" is "offset" and rest are literals.
// "A" can be omitted, in that case "A" = 1.
// "A" may have "+" or "-" sign, "B" always must be predated by sign for pattern (1).
auto is_n = [](StyleComponentValueRule value) -> bool {
return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n");
}; };
auto is_delim = [](StyleComponentValueRule value, StringView delim) -> bool { auto make_return_value = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
// When we think we are done, but there are more non-whitespace tokens, then it's a parse error.
values.skip_whitespace();
if (values.has_next_token()) {
if constexpr (CSS_PARSER_DEBUG) {
dbgln("Extra tokens at end of An+B value:");
values.dump_all_tokens();
}
return syntax_error();
} else {
return Selector::SimpleSelector::ANPlusBPattern { a, b };
}
};
auto is_n = [](StyleComponentValueRule const& value) -> bool {
return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n"sv);
};
auto is_ndash = [](StyleComponentValueRule const& value) -> bool {
return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n-"sv);
};
auto is_dashn = [](StyleComponentValueRule const& value) -> bool {
return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("-n"sv);
};
auto is_dashndash = [](StyleComponentValueRule const& value) -> bool {
return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("-n-"sv);
};
auto is_delim = [](StyleComponentValueRule const& value, StringView const& delim) -> bool {
return value.is(Token::Type::Delim) && value.token().delim().equals_ignoring_case(delim); return value.is(Token::Type::Delim) && value.token().delim().equals_ignoring_case(delim);
}; };
auto is_n_dimension = [](StyleComponentValueRule const& value) -> bool {
int step_size_or_offset = 0; if (!value.is(Token::Type::Dimension))
return false;
// "When a=1, or a=-1, the 1 may be omitted from the rule." if (value.token().number_type() != Token::NumberType::Integer)
if (is_n(current_value)) { return false;
step_size_or_offset = +1; if (!value.token().dimension_unit().equals_ignoring_case("n"sv))
} else if (is_delim(current_value, "+"sv) && is_n(values.peek_token())) { return false;
step_size_or_offset = +1; return true;
values.next_token(); };
} else if (is_delim(current_value, "-"sv) && is_n(values.peek_token())) { auto is_ndash_dimension = [](StyleComponentValueRule const& value) -> bool {
step_size_or_offset = -1; if (!value.is(Token::Type::Dimension))
values.next_token(); return false;
} else if (current_value.is(Token::Type::Number)) { if (value.token().number_type() != Token::NumberType::Integer)
step_size_or_offset = current_value.token().integer(); return false;
} else { if (!value.token().dimension_unit().equals_ignoring_case("n-"sv))
values.reconsume_current_input_token(); return false;
} return true;
};
current_value = values.next_token(); auto is_ndashdigit_dimension = [](StyleComponentValueRule const& value) -> bool {
if (!value.is(Token::Type::Dimension))
if (is_n(current_value)) { return false;
values.skip_whitespace(); if (value.token().number_type() != Token::NumberType::Integer)
return false;
auto next_value = values.peek_token(); auto dimension_unit = value.token().dimension_unit();
if (is_delim(next_value, "+") || is_delim(next_value, "-")) { if (!dimension_unit.starts_with("n-"sv, CaseSensitivity::CaseInsensitive))
const auto sign = is_delim(next_value, "+") ? 1 : -1; return false;
values.next_token(); for (size_t i = 2; i < dimension_unit.length(); ++i) {
if (!is_ascii_digit(dimension_unit[i]))
values.skip_whitespace(); return false;
// "An+B" pattern
auto number = values.next_token();
if (!number.is(Token::Type::Number))
return {};
pattern.step_size = step_size_or_offset;
pattern.offset = sign * number.token().integer();
} else {
// "An" pattern
pattern.step_size = step_size_or_offset;
} }
} else { return true;
// "B" pattern };
pattern.offset = step_size_or_offset; auto is_ndashdigit_ident = [](StyleComponentValueRule const& value) -> bool {
if (!value.is(Token::Type::Ident))
return false;
auto ident = value.token().ident();
if (!ident.starts_with("n-"sv, CaseSensitivity::CaseInsensitive))
return false;
for (size_t i = 2; i < ident.length(); ++i) {
if (!is_ascii_digit(ident[i]))
return false;
}
return true;
};
auto is_dashndashdigit_ident = [](StyleComponentValueRule const& value) -> bool {
if (!value.is(Token::Type::Ident))
return false;
auto ident = value.token().ident();
if (!ident.starts_with("-n-"sv, CaseSensitivity::CaseInsensitive))
return false;
for (size_t i = 3; i < ident.length(); ++i) {
if (!is_ascii_digit(ident[i]))
return false;
}
return true;
};
auto is_integer = [](StyleComponentValueRule const& value) -> bool {
return value.is(Token::Type::Number) && value.token().is(Token::NumberType::Integer);
};
auto is_signed_integer = [is_integer](StyleComponentValueRule const& value) -> bool {
return is_integer(value) && value.token().is_integer_value_signed();
};
auto is_signless_integer = [is_integer](StyleComponentValueRule const& value) -> bool {
return is_integer(value) && !value.token().is_integer_value_signed();
};
// https://www.w3.org/TR/css-syntax-3/#the-anb-type
// Unfortunately these can't be in the same order as in the spec.
values.skip_whitespace();
auto& first_value = values.next_token();
// odd | even
if (first_value.is(Token::Type::Ident)) {
auto ident = first_value.token().ident();
if (ident.equals_ignoring_case("odd")) {
a = 2;
b = 1;
return make_return_value();
} else if (ident.equals_ignoring_case("even")) {
a = 2;
return make_return_value();
}
}
// <integer>
if (is_integer(first_value)) {
b = first_value.token().to_integer();
return make_return_value();
}
// <n-dimension>
// <n-dimension> <signed-integer>
// <n-dimension> ['+' | '-'] <signless-integer>
if (is_n_dimension(first_value)) {
a = first_value.token().dimension_value_int();
values.skip_whitespace();
auto& second_value = values.next_token();
if (second_value.is(Token::Type::EndOfFile)) {
// <n-dimension>
return make_return_value();
} else if (is_signed_integer(second_value)) {
// <n-dimension> <signed-integer>
b = second_value.token().to_integer();
return make_return_value();
}
values.skip_whitespace();
auto& third_value = values.next_token();
if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) {
// <n-dimension> ['+' | '-'] <signless-integer>
b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1);
return make_return_value();
}
return syntax_error();
}
// <ndash-dimension> <signless-integer>
if (is_ndash_dimension(first_value)) {
values.skip_whitespace();
auto& second_value = values.next_token();
if (is_signless_integer(second_value)) {
a = first_value.token().dimension_value_int();
b = -second_value.token().to_integer();
return make_return_value();
}
return syntax_error();
}
// <ndashdigit-dimension>
if (is_ndashdigit_dimension(first_value)) {
auto& dimension = first_value.token();
a = dimension.dimension_value_int();
auto maybe_b = dimension.dimension_unit().substring_view(1).to_int();
if (maybe_b.has_value()) {
b = maybe_b.value();
return make_return_value();
}
return syntax_error();
}
// <dashndashdigit-ident>
if (is_dashndashdigit_ident(first_value)) {
a = -1;
auto maybe_b = first_value.token().ident().substring_view(2).to_int();
if (maybe_b.has_value()) {
b = maybe_b.value();
return make_return_value();
}
return syntax_error();
}
// -n
// -n <signed-integer>
// -n ['+' | '-'] <signless-integer>
if (is_dashn(first_value)) {
a = -1;
values.skip_whitespace();
auto& second_value = values.next_token();
if (second_value.is(Token::Type::EndOfFile)) {
// -n
return make_return_value();
} else if (is_signed_integer(second_value)) {
// -n <signed-integer>
b = second_value.token().to_integer();
return make_return_value();
}
values.skip_whitespace();
auto& third_value = values.next_token();
if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) {
// -n ['+' | '-'] <signless-integer>
b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1);
return make_return_value();
}
return syntax_error();
}
// -n- <signless-integer>
if (is_dashndash(first_value)) {
values.skip_whitespace();
auto& second_value = values.next_token();
if (is_signless_integer(second_value)) {
a = -1;
b = -second_value.token().to_integer();
return make_return_value();
}
return syntax_error();
} }
if (values.has_next_token()) // All that's left now are these:
return {}; // '+'?† n
// '+'?† n <signed-integer>
// '+'?† n ['+' | '-'] <signless-integer>
// '+'?† n- <signless-integer>
// '+'?† <ndashdigit-ident>
// In all of these cases, the + is optional, and has no effect.
// So, we just skip the +, and carry on.
if (!is_delim(first_value, "+"sv)) {
values.reconsume_current_input_token();
// We do *not* skip whitespace here.
}
return pattern; auto& first_after_plus = values.next_token();
// '+'?† n
// '+'?† n <signed-integer>
// '+'?† n ['+' | '-'] <signless-integer>
if (is_n(first_after_plus)) {
a = 1;
values.skip_whitespace();
auto& second_value = values.next_token();
if (second_value.is(Token::Type::EndOfFile)) {
// '+'?† n
return make_return_value();
} else if (is_signed_integer(second_value)) {
// '+'?† n <signed-integer>
b = second_value.token().to_integer();
return make_return_value();
}
values.skip_whitespace();
auto& third_value = values.next_token();
if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) {
// '+'?† n ['+' | '-'] <signless-integer>
b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1);
return make_return_value();
}
return syntax_error();
}
// '+'?† n- <signless-integer>
if (is_ndash(first_after_plus)) {
values.skip_whitespace();
auto& second_value = values.next_token();
if (is_signless_integer(second_value)) {
a = 1;
b = -second_value.token().to_integer();
return make_return_value();
}
return syntax_error();
}
// '+'?† <ndashdigit-ident>
if (is_ndashdigit_ident(first_after_plus)) {
a = 1;
auto maybe_b = first_after_plus.token().ident().substring_view(1).to_int();
if (maybe_b.has_value()) {
b = maybe_b.value();
return make_return_value();
}
return syntax_error();
}
return syntax_error();
} }
} }

View file

@ -112,7 +112,7 @@ public:
template<typename T> template<typename T>
Vector<Vector<StyleComponentValueRule>> parse_as_comma_separated_list_of_component_values(TokenStream<T>&); Vector<Vector<StyleComponentValueRule>> parse_as_comma_separated_list_of_component_values(TokenStream<T>&);
Optional<Selector::SimpleSelector::NthChildPattern> parse_nth_child_pattern(TokenStream<StyleComponentValueRule>&); Optional<Selector::SimpleSelector::ANPlusBPattern> parse_a_n_plus_b_pattern(TokenStream<StyleComponentValueRule>&);
// FIXME: https://www.w3.org/TR/selectors-4/ // FIXME: https://www.w3.org/TR/selectors-4/
// Contrary to the name, these parse a comma-separated list of selectors, according to the spec. // Contrary to the name, these parse a comma-separated list of selectors, according to the spec.

View file

@ -54,7 +54,7 @@ String Token::to_debug_string() const
case Type::Number: case Type::Number:
builder.append("Number: "); builder.append("Number: ");
builder.append(m_value.to_string()); builder.append(m_value.to_string());
builder.append(m_unit.to_string()); builder.append(m_number_type == NumberType::Integer ? " (int)" : " (float)");
return builder.to_string(); return builder.to_string();
case Type::Percentage: case Type::Percentage:
builder.append("Percentage: "); builder.append("Percentage: ");

View file

@ -100,11 +100,34 @@ public:
} }
bool is(NumberType number_type) const { return is(Token::Type::Number) && m_number_type == number_type; } bool is(NumberType number_type) const { return is(Token::Type::Number) && m_number_type == number_type; }
StringView number_string_value() const
int integer() const {
VERIFY(m_type == Type::Number);
return m_value.string_view();
}
int to_integer() const
{ {
VERIFY(m_type == Type::Number && m_number_type == NumberType::Integer); VERIFY(m_type == Type::Number && m_number_type == NumberType::Integer);
return m_value.string_view().to_int().value(); return number_string_value().to_int().value();
}
bool is_integer_value_signed() const { return number_string_value().starts_with('-') || number_string_value().starts_with('+'); }
StringView dimension_unit() const
{
VERIFY(m_type == Type::Dimension);
return m_unit.string_view();
}
StringView dimension_value() const
{
VERIFY(m_type == Type::Dimension);
return m_value.string_view();
}
int dimension_value_int() const { return dimension_value().to_int().value(); }
NumberType number_type() const
{
VERIFY((m_type == Type::Number) || (m_type == Type::Dimension));
return m_number_type;
} }
Type mirror_variant() const; Type mirror_variant() const;

View file

@ -47,11 +47,11 @@ u32 Selector::specificity() const
return ids * 0x10000 + classes * 0x100 + tag_names; return ids * 0x10000 + classes * 0x100 + tag_names;
} }
Selector::SimpleSelector::NthChildPattern Selector::SimpleSelector::NthChildPattern::parse(StringView const& args) Selector::SimpleSelector::ANPlusBPattern Selector::SimpleSelector::ANPlusBPattern::parse(StringView const& args)
{ {
// FIXME: Remove this when the DeprecatedCSSParser is gone. // FIXME: Remove this when the DeprecatedCSSParser is gone.
// The new Parser::parse_nth_child_pattern() does the same as this, using Tokens. // The new Parser::parse_nth_child_pattern() does the same as this, using Tokens.
CSS::Selector::SimpleSelector::NthChildPattern pattern; CSS::Selector::SimpleSelector::ANPlusBPattern pattern;
if (args.equals_ignoring_case("odd")) { if (args.equals_ignoring_case("odd")) {
pattern.step_size = 2; pattern.step_size = 2;
pattern.offset = 1; pattern.offset = 1;

View file

@ -33,11 +33,15 @@ public:
}; };
Type type { Type::Invalid }; Type type { Type::Invalid };
struct NthChildPattern { struct ANPlusBPattern {
int step_size { 0 }; int step_size { 0 }; // "A"
int offset = { 0 }; int offset = { 0 }; // "B"
static NthChildPattern parse(StringView const& args); static ANPlusBPattern parse(StringView const& args);
String to_string() const
{
return String::formatted("{}n{:+}", step_size, offset);
}
}; };
struct PseudoClass { struct PseudoClass {
@ -66,7 +70,7 @@ public:
// FIXME: We don't need this field on every single SimpleSelector, but it's also annoying to malloc it somewhere. // FIXME: We don't need this field on every single SimpleSelector, but it's also annoying to malloc it somewhere.
// Only used when "pseudo_class" is "NthChild" or "NthLastChild". // Only used when "pseudo_class" is "NthChild" or "NthLastChild".
NthChildPattern nth_child_pattern; ANPlusBPattern nth_child_pattern;
SelectorList not_selector {}; SelectorList not_selector {};
}; };