diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 30bd9ad0bb..1455e16e65 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -58,6 +58,7 @@ set(SOURCES CSS/Parser/Parser.cpp CSS/Parser/ParsingContext.cpp CSS/Parser/Rule.cpp + CSS/Parser/SelectorParsing.cpp CSS/Parser/Token.cpp CSS/Parser/Tokenizer.cpp CSS/PercentageOr.cpp diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index f965ccb2f8..2695e874b7 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -166,635 +165,6 @@ CSSStyleSheet* Parser::parse_as_css_stylesheet(Optional location) return CSSStyleSheet::create(m_context.realm(), rule_list, media_list, move(location)); } -Optional Parser::parse_as_selector(SelectorParsingMode parsing_mode) -{ - auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode); - if (!selector_list.is_error()) - return selector_list.release_value(); - - return {}; -} - -Optional Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode) -{ - auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode); - if (!selector_list.is_error()) - return selector_list.release_value(); - - return {}; -} - -template -Parser::ParseErrorOr Parser::parse_a_selector_list(TokenStream& tokens, SelectorType mode, SelectorParsingMode parsing_mode) -{ - auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens); - - Vector> selectors; - for (auto& selector_parts : comma_separated_lists) { - auto stream = TokenStream(selector_parts); - auto selector = parse_complex_selector(stream, mode); - if (selector.is_error()) { - if (parsing_mode == SelectorParsingMode::Forgiving) - continue; - return selector.error(); - } - selectors.append(selector.release_value()); - } - - if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving) - return ParseError::SyntaxError; - - return selectors; -} - -Parser::ParseErrorOr> Parser::parse_complex_selector(TokenStream& tokens, SelectorType mode) -{ - Vector compound_selectors; - - auto first_selector = TRY(parse_compound_selector(tokens)); - if (!first_selector.has_value()) - return ParseError::SyntaxError; - - if (mode == SelectorType::Standalone) { - if (first_selector->combinator != Selector::Combinator::Descendant) - return ParseError::SyntaxError; - first_selector->combinator = Selector::Combinator::None; - } - compound_selectors.append(first_selector.release_value()); - - while (tokens.has_next_token()) { - auto compound_selector = TRY(parse_compound_selector(tokens)); - if (!compound_selector.has_value()) - break; - compound_selectors.append(compound_selector.release_value()); - } - - if (compound_selectors.is_empty()) - return ParseError::SyntaxError; - - return Selector::create(move(compound_selectors)); -} - -Parser::ParseErrorOr> Parser::parse_compound_selector(TokenStream& tokens) -{ - tokens.skip_whitespace(); - - auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant); - - tokens.skip_whitespace(); - - Vector simple_selectors; - - while (tokens.has_next_token()) { - auto component = TRY(parse_simple_selector(tokens)); - if (!component.has_value()) - break; - simple_selectors.append(component.release_value()); - } - - if (simple_selectors.is_empty()) - return Optional {}; - - return Selector::CompoundSelector { combinator, move(simple_selectors) }; -} - -Optional Parser::parse_selector_combinator(TokenStream& tokens) -{ - auto const& current_value = tokens.next_token(); - if (current_value.is(Token::Type::Delim)) { - switch (current_value.token().delim()) { - case '>': - return Selector::Combinator::ImmediateChild; - case '+': - return Selector::Combinator::NextSibling; - case '~': - return Selector::Combinator::SubsequentSibling; - case '|': { - auto const& next = tokens.peek_token(); - if (next.is(Token::Type::EndOfFile)) - return {}; - - if (next.is_delim('|')) { - tokens.next_token(); - return Selector::Combinator::Column; - } - } - } - } - - tokens.reconsume_current_input_token(); - return {}; -} - -Optional Parser::parse_selector_qualified_name(TokenStream& tokens, AllowWildcardName allow_wildcard_name) -{ - auto is_name = [](ComponentValue const& token) { - return token.is_delim('*') || token.is(Token::Type::Ident); - }; - auto get_name = [](ComponentValue const& token) { - if (token.is_delim('*')) - return FlyString::from_utf8("*"sv); - return FlyString::from_utf8(token.token().ident()); - }; - - // There are 3 possibilities here: - // (Where and are either an or a `*` delim) - // 1) `|` - // 2) `|` - // 3) `` - // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space - - auto transaction = tokens.begin_transaction(); - - auto first_token = tokens.next_token(); - if (first_token.is_delim('|')) { - // Case 1: `|` - if (is_name(tokens.peek_token())) { - auto name_token = tokens.next_token(); - - if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*')) - return {}; - - transaction.commit(); - return Selector::SimpleSelector::QualifiedName { - .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None, - .name = get_name(name_token).release_value_but_fixme_should_propagate_errors(), - }; - } - return {}; - } - - if (!is_name(first_token)) - return {}; - - if (tokens.peek_token().is_delim('|') && is_name(tokens.peek_token(1))) { - // Case 2: `|` - (void)tokens.next_token(); // `|` - auto namespace_ = get_name(first_token).release_value_but_fixme_should_propagate_errors(); - auto name = get_name(tokens.next_token()).release_value_but_fixme_should_propagate_errors(); - - if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv) - return {}; - - auto namespace_type = namespace_ == "*"sv - ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any - : Selector::SimpleSelector::QualifiedName::NamespaceType::Named; - - transaction.commit(); - return Selector::SimpleSelector::QualifiedName { - .namespace_type = namespace_type, - .namespace_ = namespace_, - .name = name, - }; - } - - // Case 3: `` - auto& name_token = first_token; - if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*')) - return {}; - - transaction.commit(); - return Selector::SimpleSelector::QualifiedName { - .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default, - .name = get_name(name_token).release_value_but_fixme_should_propagate_errors(), - }; -} - -Parser::ParseErrorOr Parser::parse_attribute_simple_selector(ComponentValue const& first_value) -{ - auto attribute_tokens = TokenStream { first_value.block().values() }; - - attribute_tokens.skip_whitespace(); - - if (!attribute_tokens.has_next_token()) { - dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!"); - return ParseError::SyntaxError; - } - - auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No); - if (!maybe_qualified_name.has_value()) { - dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.peek_token().to_debug_string()); - return ParseError::SyntaxError; - } - - Selector::SimpleSelector simple_selector { - .type = Selector::SimpleSelector::Type::Attribute, - .value = Selector::SimpleSelector::Attribute { - .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute, - // FIXME: Case-sensitivity is defined by the document language. - // HTML is insensitive with attribute names, and our code generally assumes - // they are converted to lowercase, so we do that here too. If we want to be - // correct with XML later, we'll need to keep the original case and then do - // a case-insensitive compare later. - .qualified_name = maybe_qualified_name.release_value(), - .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch, - } - }; - - attribute_tokens.skip_whitespace(); - if (!attribute_tokens.has_next_token()) - return simple_selector; - - auto const& delim_part = attribute_tokens.next_token(); - if (!delim_part.is(Token::Type::Delim)) { - dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string()); - return ParseError::SyntaxError; - } - - if (delim_part.token().delim() == '=') { - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch; - } else { - if (!attribute_tokens.has_next_token()) { - dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type."); - return ParseError::SyntaxError; - } - - auto const& delim_second_part = attribute_tokens.next_token(); - if (!delim_second_part.is_delim('=')) { - dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string()); - return ParseError::SyntaxError; - } - switch (delim_part.token().delim()) { - case '~': - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord; - break; - case '*': - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString; - break; - case '|': - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment; - break; - case '^': - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString; - break; - case '$': - simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString; - break; - default: - attribute_tokens.reconsume_current_input_token(); - } - } - - attribute_tokens.skip_whitespace(); - if (!attribute_tokens.has_next_token()) { - dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match."); - return ParseError::SyntaxError; - } - - auto const& value_part = attribute_tokens.next_token(); - if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) { - dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string()); - return ParseError::SyntaxError; - } - auto value_string_view = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string(); - simple_selector.attribute().value = String::from_utf8(value_string_view).release_value_but_fixme_should_propagate_errors(); - - attribute_tokens.skip_whitespace(); - // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case - if (attribute_tokens.has_next_token()) { - auto const& case_sensitivity_part = attribute_tokens.next_token(); - if (case_sensitivity_part.is(Token::Type::Ident)) { - auto case_sensitivity = case_sensitivity_part.token().ident(); - if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) { - simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch; - } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) { - simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch; - } else { - dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string()); - return ParseError::SyntaxError; - } - } else { - dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string()); - return ParseError::SyntaxError; - } - } - - if (attribute_tokens.has_next_token()) { - dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector."); - return ParseError::SyntaxError; - } - - return simple_selector; -} - -Parser::ParseErrorOr Parser::parse_pseudo_simple_selector(TokenStream& tokens) -{ - auto peek_token_ends_selector = [&]() -> bool { - auto const& value = tokens.peek_token(); - return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma)); - }; - - if (peek_token_ends_selector()) - return ParseError::SyntaxError; - - bool is_pseudo = false; - if (tokens.peek_token().is(Token::Type::Colon)) { - is_pseudo = true; - tokens.next_token(); - if (peek_token_ends_selector()) - return ParseError::SyntaxError; - } - - if (is_pseudo) { - auto const& name_token = tokens.next_token(); - if (!name_token.is(Token::Type::Ident)) { - dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string()); - return ParseError::SyntaxError; - } - - auto pseudo_name = name_token.token().ident(); - auto pseudo_element = pseudo_element_from_string(pseudo_name); - - // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar - if (!pseudo_element.has_value() && has_ignored_vendor_prefix(pseudo_name)) - return ParseError::IncludesIgnoredVendorPrefix; - - if (!pseudo_element.has_value()) { - dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name); - return ParseError::SyntaxError; - } - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoElement, - .value = pseudo_element.value() - }; - } - - if (peek_token_ends_selector()) - return ParseError::SyntaxError; - - auto const& pseudo_class_token = tokens.next_token(); - - if (pseudo_class_token.is(Token::Type::Ident)) { - auto pseudo_name = pseudo_class_token.token().ident(); - if (has_ignored_vendor_prefix(pseudo_name)) - return ParseError::IncludesIgnoredVendorPrefix; - - auto make_pseudo_class_selector = [](auto pseudo_class) { - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class } - }; - }; - - if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) { - if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) { - dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name); - return ParseError::SyntaxError; - } - return make_pseudo_class_selector(pseudo_class.value()); - } - - // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility. - // https://www.w3.org/TR/selectors/#pseudo-element-syntax - if (auto pseudo_element = pseudo_element_from_string(pseudo_name); pseudo_element.has_value()) { - switch (pseudo_element.value()) { - case Selector::PseudoElement::After: - case Selector::PseudoElement::Before: - case Selector::PseudoElement::FirstLetter: - case Selector::PseudoElement::FirstLine: - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoElement, - .value = pseudo_element.value() - }; - default: - break; - } - } - - dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name); - return ParseError::SyntaxError; - } - - if (pseudo_class_token.is_function()) { - auto parse_nth_child_selector = [this](auto pseudo_class, Vector const& function_values, bool allow_of = false) -> ParseErrorOr { - auto tokens = TokenStream(function_values); - auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens); - if (!nth_child_pattern.has_value()) { - dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class)); - return ParseError::SyntaxError; - } - - tokens.skip_whitespace(); - if (!tokens.has_next_token()) { - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .nth_child_pattern = nth_child_pattern.release_value() } - }; - } - - if (!allow_of) - return ParseError::SyntaxError; - - // Parse the `of ` syntax - auto const& maybe_of = tokens.next_token(); - if (!(maybe_of.is(Token::Type::Ident) && maybe_of.token().ident().equals_ignoring_ascii_case("of"sv))) - return ParseError::SyntaxError; - - tokens.skip_whitespace(); - auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone)); - - tokens.skip_whitespace(); - if (tokens.has_next_token()) - return ParseError::SyntaxError; - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .nth_child_pattern = nth_child_pattern.release_value(), - .argument_selector_list = move(selector_list) } - }; - }; - - auto const& pseudo_function = pseudo_class_token.function(); - auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name()); - if (!maybe_pseudo_class.has_value()) { - dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name()); - return ParseError::SyntaxError; - } - auto pseudo_class = maybe_pseudo_class.value(); - auto metadata = pseudo_class_metadata(pseudo_class); - - if (!metadata.is_valid_as_function) { - dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name()); - return ParseError::SyntaxError; - } - - if (pseudo_function.values().is_empty()) { - dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name()); - return ParseError::SyntaxError; - } - - switch (metadata.parameter_type) { - case PseudoClassMetadata::ParameterType::ANPlusB: - return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false); - case PseudoClassMetadata::ParameterType::ANPlusBOf: - return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true); - case PseudoClassMetadata::ParameterType::CompoundSelector: { - auto function_token_stream = TokenStream(pseudo_function.values()); - auto compound_selector_or_error = parse_compound_selector(function_token_stream); - if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) { - dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name()); - return ParseError::SyntaxError; - } - - Vector compound_selectors { compound_selector_or_error.release_value().release_value() }; - auto selector = Selector::create(move(compound_selectors)); - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .argument_selector_list = { move(selector) } } - }; - } - case PseudoClassMetadata::ParameterType::ForgivingSelectorList: { - auto function_token_stream = TokenStream(pseudo_function.values()); - // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list. - auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, SelectorType::Standalone, SelectorParsingMode::Forgiving)); - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .argument_selector_list = move(argument_selector_list) } - }; - } - case PseudoClassMetadata::ParameterType::LanguageRanges: { - Vector languages; - auto function_token_stream = TokenStream(pseudo_function.values()); - auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream); - - for (auto language_token_list : language_token_lists) { - auto language_token_stream = TokenStream(language_token_list); - language_token_stream.skip_whitespace(); - auto language_token = language_token_stream.next_token(); - if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) { - dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name()); - return ParseError::SyntaxError; - } - - auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident(); - languages.append(MUST(FlyString::from_utf8(language_string))); - - language_token_stream.skip_whitespace(); - if (language_token_stream.has_next_token()) { - dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name()); - return ParseError::SyntaxError; - } - } - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .languages = move(languages) } - }; - } - case PseudoClassMetadata::ParameterType::SelectorList: { - auto function_token_stream = TokenStream(pseudo_function.values()); - auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone)); - - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::PseudoClass, - .value = Selector::SimpleSelector::PseudoClassSelector { - .type = pseudo_class, - .argument_selector_list = move(not_selector) } - }; - } - case PseudoClassMetadata::ParameterType::None: - // `None` means this is not a function-type pseudo-class, so this state should be impossible. - VERIFY_NOT_REACHED(); - } - } - dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string()); - return ParseError::SyntaxError; -} - -Parser::ParseErrorOr> Parser::parse_simple_selector(TokenStream& tokens) -{ - auto peek_token_ends_selector = [&]() -> bool { - auto const& value = tokens.peek_token(); - return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma)); - }; - - if (peek_token_ends_selector()) - return Optional {}; - - // Handle universal and tag-name types together, since both can be namespaced - if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) { - if (qualified_name->name.name == "*"sv) { - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::Universal, - .value = qualified_name.release_value(), - }; - } - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::TagName, - .value = qualified_name.release_value(), - }; - } - - auto const& first_value = tokens.next_token(); - - if (first_value.is(Token::Type::Delim)) { - u32 delim = first_value.token().delim(); - switch (delim) { - case '*': - // Handled already - VERIFY_NOT_REACHED(); - case '.': { - if (peek_token_ends_selector()) - return ParseError::SyntaxError; - - auto const& class_name_value = tokens.next_token(); - if (!class_name_value.is(Token::Type::Ident)) { - dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string()); - return ParseError::SyntaxError; - } - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::Class, - .value = Selector::SimpleSelector::Name { FlyString::from_utf8(class_name_value.token().ident()).release_value_but_fixme_should_propagate_errors() } - }; - } - case '>': - case '+': - case '~': - case '|': - // Whitespace is not required between the compound-selector and a combinator. - // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error. - tokens.reconsume_current_input_token(); - return Optional {}; - default: - dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!"); - return ParseError::SyntaxError; - } - } - - if (first_value.is(Token::Type::Hash)) { - if (first_value.token().hash_type() != Token::HashType::Id) { - dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string()); - return ParseError::SyntaxError; - } - return Selector::SimpleSelector { - .type = Selector::SimpleSelector::Type::Id, - .value = Selector::SimpleSelector::Name { FlyString::from_utf8(first_value.token().hash_value()).release_value_but_fixme_should_propagate_errors() } - }; - } - - if (first_value.is_block() && first_value.block().is_square()) - return TRY(parse_attribute_simple_selector(first_value)); - - if (first_value.is(Token::Type::Colon)) - return TRY(parse_pseudo_simple_selector(tokens)); - - dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!"); - return ParseError::SyntaxError; -} - RefPtr Parser::parse_as_supports() { return parse_a_supports(m_token_stream); @@ -7323,308 +6693,6 @@ ErrorOr Parser::parse_css_value_for_properties(Readonl return PropertyAndValue { property_ids.first(), nullptr }; } -Optional Parser::parse_a_n_plus_b_pattern(TokenStream& values) -{ - auto transaction = values.begin_transaction(); - auto syntax_error = [&]() -> Optional { - if constexpr (CSS_PARSER_DEBUG) { - dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:"); - values.dump_all_tokens(); - } - return {}; - }; - - auto is_n = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("n"sv); - }; - auto is_ndash = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("n-"sv); - }; - auto is_dashn = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("-n"sv); - }; - auto is_dashndash = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("-n-"sv); - }; - auto is_sign = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-'); - }; - auto is_n_dimension = [](ComponentValue const& value) -> bool { - if (!value.is(Token::Type::Dimension)) - return false; - if (!value.token().number().is_integer()) - return false; - if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv)) - return false; - return true; - }; - auto is_ndash_dimension = [](ComponentValue const& value) -> bool { - if (!value.is(Token::Type::Dimension)) - return false; - if (!value.token().number().is_integer()) - return false; - if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv)) - return false; - return true; - }; - auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool { - if (!value.is(Token::Type::Dimension)) - return false; - if (!value.token().number().is_integer()) - return false; - auto dimension_unit = value.token().dimension_unit(); - if (!dimension_unit.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) - return false; - for (size_t i = 2; i < dimension_unit.length(); ++i) { - if (!is_ascii_digit(dimension_unit[i])) - return false; - } - return true; - }; - auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool { - if (!value.is(Token::Type::Ident)) - return false; - auto ident = value.token().ident(); - if (!ident.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) - return false; - for (size_t i = 2; i < ident.length(); ++i) { - if (!is_ascii_digit(ident[i])) - return false; - } - return true; - }; - auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool { - if (!value.is(Token::Type::Ident)) - return false; - auto ident = value.token().ident(); - if (!ident.starts_with("-n-"sv, CaseSensitivity::CaseInsensitive)) - return false; - if (ident.length() == 3) - return false; - for (size_t i = 3; i < ident.length(); ++i) { - if (!is_ascii_digit(ident[i])) - return false; - } - return true; - }; - auto is_integer = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Number) && value.token().number().is_integer(); - }; - auto is_signed_integer = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign(); - }; - auto is_signless_integer = [](ComponentValue const& value) -> bool { - return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign(); - }; - - // https://www.w3.org/TR/css-syntax-3/#the-anb-type - // Unfortunately these can't be in the same order as in the spec. - - values.skip_whitespace(); - auto const& first_value = values.next_token(); - - // odd | even - if (first_value.is(Token::Type::Ident)) { - auto ident = first_value.token().ident(); - if (ident.equals_ignoring_ascii_case("odd"sv)) { - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 2, 1 }; - } - if (ident.equals_ignoring_ascii_case("even"sv)) { - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 2, 0 }; - } - } - // - if (is_integer(first_value)) { - int b = first_value.token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 0, b }; - } - // - // - // ['+' | '-'] - if (is_n_dimension(first_value)) { - int a = first_value.token().dimension_value_int(); - values.skip_whitespace(); - - // - if (is_signed_integer(values.peek_token())) { - int b = values.next_token().token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { a, b }; - } - - // ['+' | '-'] - { - auto child_transaction = transaction.create_child(); - auto const& second_value = values.next_token(); - values.skip_whitespace(); - auto const& third_value = values.next_token(); - - if (is_sign(second_value) && is_signless_integer(third_value)) { - int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); - child_transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { a, b }; - } - } - - // - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { a, 0 }; - } - // - if (is_ndash_dimension(first_value)) { - values.skip_whitespace(); - auto const& second_value = values.next_token(); - if (is_signless_integer(second_value)) { - int a = first_value.token().dimension_value_int(); - int b = -second_value.token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { a, b }; - } - - return syntax_error(); - } - // - if (is_ndashdigit_dimension(first_value)) { - auto const& dimension = first_value.token(); - int a = dimension.dimension_value_int(); - auto maybe_b = dimension.dimension_unit().substring_view(1).to_int(); - if (maybe_b.has_value()) { - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() }; - } - - return syntax_error(); - } - // - if (is_dashndashdigit_ident(first_value)) { - auto maybe_b = first_value.token().ident().substring_view(2).to_int(); - if (maybe_b.has_value()) { - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() }; - } - - return syntax_error(); - } - // -n - // -n - // -n ['+' | '-'] - if (is_dashn(first_value)) { - values.skip_whitespace(); - - // -n - if (is_signed_integer(values.peek_token())) { - int b = values.next_token().token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { -1, b }; - } - - // -n ['+' | '-'] - { - auto child_transaction = transaction.create_child(); - auto const& second_value = values.next_token(); - values.skip_whitespace(); - auto const& third_value = values.next_token(); - - if (is_sign(second_value) && is_signless_integer(third_value)) { - int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); - child_transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { -1, b }; - } - } - - // -n - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { -1, 0 }; - } - // -n- - if (is_dashndash(first_value)) { - values.skip_whitespace(); - auto const& second_value = values.next_token(); - if (is_signless_integer(second_value)) { - int b = -second_value.token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { -1, b }; - } - - return syntax_error(); - } - - // All that's left now are these: - // '+'?† n - // '+'?† n - // '+'?† n ['+' | '-'] - // '+'?† n- - // '+'?† - // In all of these cases, the + is optional, and has no effect. - // So, we just skip the +, and carry on. - if (!first_value.is_delim('+')) { - values.reconsume_current_input_token(); - // We do *not* skip whitespace here. - } - - auto const& first_after_plus = values.next_token(); - // '+'?† n - // '+'?† n - // '+'?† n ['+' | '-'] - if (is_n(first_after_plus)) { - values.skip_whitespace(); - - // '+'?† n - if (is_signed_integer(values.peek_token())) { - int b = values.next_token().token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 1, b }; - } - - // '+'?† n ['+' | '-'] - { - auto child_transaction = transaction.create_child(); - auto const& second_value = values.next_token(); - values.skip_whitespace(); - auto const& third_value = values.next_token(); - - if (is_sign(second_value) && is_signless_integer(third_value)) { - int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); - child_transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 1, b }; - } - } - - // '+'?† n - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 1, 0 }; - } - - // '+'?† n- - if (is_ndash(first_after_plus)) { - values.skip_whitespace(); - auto const& second_value = values.next_token(); - if (is_signless_integer(second_value)) { - int b = -second_value.token().to_integer(); - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 1, b }; - } - - return syntax_error(); - } - - // '+'?† - if (is_ndashdigit_ident(first_after_plus)) { - auto maybe_b = first_after_plus.token().ident().substring_view(1).to_int(); - if (maybe_b.has_value()) { - transaction.commit(); - return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() }; - } - - return syntax_error(); - } - - return syntax_error(); -} - class UnparsedCalculationNode final : public CalculationNode { public: static ErrorOr> create(ComponentValue component_value) diff --git a/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp b/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp new file mode 100644 index 0000000000..1a5c8b297d --- /dev/null +++ b/Userland/Libraries/LibWeb/CSS/Parser/SelectorParsing.cpp @@ -0,0 +1,949 @@ +/* + * Copyright (c) 2018-2022, Andreas Kling + * Copyright (c) 2020-2021, the SerenityOS developers. + * Copyright (c) 2021-2023, Sam Atkins + * Copyright (c) 2021, Tobias Christiansen + * Copyright (c) 2022, MacDue + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace Web::CSS::Parser { + +Optional Parser::parse_as_selector(SelectorParsingMode parsing_mode) +{ + auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode); + if (!selector_list.is_error()) + return selector_list.release_value(); + + return {}; +} + +Optional Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode) +{ + auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode); + if (!selector_list.is_error()) + return selector_list.release_value(); + + return {}; +} + +template +Parser::ParseErrorOr Parser::parse_a_selector_list(TokenStream& tokens, SelectorType mode, SelectorParsingMode parsing_mode) +{ + auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens); + + Vector> selectors; + for (auto& selector_parts : comma_separated_lists) { + auto stream = TokenStream(selector_parts); + auto selector = parse_complex_selector(stream, mode); + if (selector.is_error()) { + if (parsing_mode == SelectorParsingMode::Forgiving) + continue; + return selector.error(); + } + selectors.append(selector.release_value()); + } + + if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving) + return ParseError::SyntaxError; + + return selectors; +} +template Parser::ParseErrorOr Parser::parse_a_selector_list(TokenStream&, SelectorType, SelectorParsingMode); +template Parser::ParseErrorOr Parser::parse_a_selector_list(TokenStream&, SelectorType, SelectorParsingMode); + +Parser::ParseErrorOr> Parser::parse_complex_selector(TokenStream& tokens, SelectorType mode) +{ + Vector compound_selectors; + + auto first_selector = TRY(parse_compound_selector(tokens)); + if (!first_selector.has_value()) + return ParseError::SyntaxError; + + if (mode == SelectorType::Standalone) { + if (first_selector->combinator != Selector::Combinator::Descendant) + return ParseError::SyntaxError; + first_selector->combinator = Selector::Combinator::None; + } + compound_selectors.append(first_selector.release_value()); + + while (tokens.has_next_token()) { + auto compound_selector = TRY(parse_compound_selector(tokens)); + if (!compound_selector.has_value()) + break; + compound_selectors.append(compound_selector.release_value()); + } + + if (compound_selectors.is_empty()) + return ParseError::SyntaxError; + + return Selector::create(move(compound_selectors)); +} + +Parser::ParseErrorOr> Parser::parse_compound_selector(TokenStream& tokens) +{ + tokens.skip_whitespace(); + + auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant); + + tokens.skip_whitespace(); + + Vector simple_selectors; + + while (tokens.has_next_token()) { + auto component = TRY(parse_simple_selector(tokens)); + if (!component.has_value()) + break; + simple_selectors.append(component.release_value()); + } + + if (simple_selectors.is_empty()) + return Optional {}; + + return Selector::CompoundSelector { combinator, move(simple_selectors) }; +} + +Optional Parser::parse_selector_combinator(TokenStream& tokens) +{ + auto const& current_value = tokens.next_token(); + if (current_value.is(Token::Type::Delim)) { + switch (current_value.token().delim()) { + case '>': + return Selector::Combinator::ImmediateChild; + case '+': + return Selector::Combinator::NextSibling; + case '~': + return Selector::Combinator::SubsequentSibling; + case '|': { + auto const& next = tokens.peek_token(); + if (next.is(Token::Type::EndOfFile)) + return {}; + + if (next.is_delim('|')) { + tokens.next_token(); + return Selector::Combinator::Column; + } + } + } + } + + tokens.reconsume_current_input_token(); + return {}; +} + +Optional Parser::parse_selector_qualified_name(TokenStream& tokens, AllowWildcardName allow_wildcard_name) +{ + auto is_name = [](ComponentValue const& token) { + return token.is_delim('*') || token.is(Token::Type::Ident); + }; + auto get_name = [](ComponentValue const& token) { + if (token.is_delim('*')) + return FlyString::from_utf8("*"sv); + return FlyString::from_utf8(token.token().ident()); + }; + + // There are 3 possibilities here: + // (Where and are either an or a `*` delim) + // 1) `|` + // 2) `|` + // 3) `` + // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space + + auto transaction = tokens.begin_transaction(); + + auto first_token = tokens.next_token(); + if (first_token.is_delim('|')) { + // Case 1: `|` + if (is_name(tokens.peek_token())) { + auto name_token = tokens.next_token(); + + if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*')) + return {}; + + transaction.commit(); + return Selector::SimpleSelector::QualifiedName { + .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None, + .name = get_name(name_token).release_value_but_fixme_should_propagate_errors(), + }; + } + return {}; + } + + if (!is_name(first_token)) + return {}; + + if (tokens.peek_token().is_delim('|') && is_name(tokens.peek_token(1))) { + // Case 2: `|` + (void)tokens.next_token(); // `|` + auto namespace_ = get_name(first_token).release_value_but_fixme_should_propagate_errors(); + auto name = get_name(tokens.next_token()).release_value_but_fixme_should_propagate_errors(); + + if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv) + return {}; + + auto namespace_type = namespace_ == "*"sv + ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any + : Selector::SimpleSelector::QualifiedName::NamespaceType::Named; + + transaction.commit(); + return Selector::SimpleSelector::QualifiedName { + .namespace_type = namespace_type, + .namespace_ = namespace_, + .name = name, + }; + } + + // Case 3: `` + auto& name_token = first_token; + if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*')) + return {}; + + transaction.commit(); + return Selector::SimpleSelector::QualifiedName { + .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default, + .name = get_name(name_token).release_value_but_fixme_should_propagate_errors(), + }; +} + +Parser::ParseErrorOr Parser::parse_attribute_simple_selector(ComponentValue const& first_value) +{ + auto attribute_tokens = TokenStream { first_value.block().values() }; + + attribute_tokens.skip_whitespace(); + + if (!attribute_tokens.has_next_token()) { + dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!"); + return ParseError::SyntaxError; + } + + auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No); + if (!maybe_qualified_name.has_value()) { + dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.peek_token().to_debug_string()); + return ParseError::SyntaxError; + } + + Selector::SimpleSelector simple_selector { + .type = Selector::SimpleSelector::Type::Attribute, + .value = Selector::SimpleSelector::Attribute { + .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute, + // FIXME: Case-sensitivity is defined by the document language. + // HTML is insensitive with attribute names, and our code generally assumes + // they are converted to lowercase, so we do that here too. If we want to be + // correct with XML later, we'll need to keep the original case and then do + // a case-insensitive compare later. + .qualified_name = maybe_qualified_name.release_value(), + .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch, + } + }; + + attribute_tokens.skip_whitespace(); + if (!attribute_tokens.has_next_token()) + return simple_selector; + + auto const& delim_part = attribute_tokens.next_token(); + if (!delim_part.is(Token::Type::Delim)) { + dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string()); + return ParseError::SyntaxError; + } + + if (delim_part.token().delim() == '=') { + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch; + } else { + if (!attribute_tokens.has_next_token()) { + dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type."); + return ParseError::SyntaxError; + } + + auto const& delim_second_part = attribute_tokens.next_token(); + if (!delim_second_part.is_delim('=')) { + dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string()); + return ParseError::SyntaxError; + } + switch (delim_part.token().delim()) { + case '~': + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord; + break; + case '*': + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString; + break; + case '|': + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment; + break; + case '^': + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString; + break; + case '$': + simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString; + break; + default: + attribute_tokens.reconsume_current_input_token(); + } + } + + attribute_tokens.skip_whitespace(); + if (!attribute_tokens.has_next_token()) { + dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match."); + return ParseError::SyntaxError; + } + + auto const& value_part = attribute_tokens.next_token(); + if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) { + dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string()); + return ParseError::SyntaxError; + } + auto value_string_view = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string(); + simple_selector.attribute().value = String::from_utf8(value_string_view).release_value_but_fixme_should_propagate_errors(); + + attribute_tokens.skip_whitespace(); + // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case + if (attribute_tokens.has_next_token()) { + auto const& case_sensitivity_part = attribute_tokens.next_token(); + if (case_sensitivity_part.is(Token::Type::Ident)) { + auto case_sensitivity = case_sensitivity_part.token().ident(); + if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) { + simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch; + } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) { + simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch; + } else { + dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string()); + return ParseError::SyntaxError; + } + } else { + dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string()); + return ParseError::SyntaxError; + } + } + + if (attribute_tokens.has_next_token()) { + dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector."); + return ParseError::SyntaxError; + } + + return simple_selector; +} + +Parser::ParseErrorOr Parser::parse_pseudo_simple_selector(TokenStream& tokens) +{ + auto peek_token_ends_selector = [&]() -> bool { + auto const& value = tokens.peek_token(); + return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma)); + }; + + if (peek_token_ends_selector()) + return ParseError::SyntaxError; + + bool is_pseudo = false; + if (tokens.peek_token().is(Token::Type::Colon)) { + is_pseudo = true; + tokens.next_token(); + if (peek_token_ends_selector()) + return ParseError::SyntaxError; + } + + if (is_pseudo) { + auto const& name_token = tokens.next_token(); + if (!name_token.is(Token::Type::Ident)) { + dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string()); + return ParseError::SyntaxError; + } + + auto pseudo_name = name_token.token().ident(); + auto pseudo_element = pseudo_element_from_string(pseudo_name); + + // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar + if (!pseudo_element.has_value() && has_ignored_vendor_prefix(pseudo_name)) + return ParseError::IncludesIgnoredVendorPrefix; + + if (!pseudo_element.has_value()) { + dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name); + return ParseError::SyntaxError; + } + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoElement, + .value = pseudo_element.value() + }; + } + + if (peek_token_ends_selector()) + return ParseError::SyntaxError; + + auto const& pseudo_class_token = tokens.next_token(); + + if (pseudo_class_token.is(Token::Type::Ident)) { + auto pseudo_name = pseudo_class_token.token().ident(); + if (has_ignored_vendor_prefix(pseudo_name)) + return ParseError::IncludesIgnoredVendorPrefix; + + auto make_pseudo_class_selector = [](auto pseudo_class) { + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class } + }; + }; + + if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) { + if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) { + dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name); + return ParseError::SyntaxError; + } + return make_pseudo_class_selector(pseudo_class.value()); + } + + // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility. + // https://www.w3.org/TR/selectors/#pseudo-element-syntax + if (auto pseudo_element = pseudo_element_from_string(pseudo_name); pseudo_element.has_value()) { + switch (pseudo_element.value()) { + case Selector::PseudoElement::After: + case Selector::PseudoElement::Before: + case Selector::PseudoElement::FirstLetter: + case Selector::PseudoElement::FirstLine: + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoElement, + .value = pseudo_element.value() + }; + default: + break; + } + } + + dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name); + return ParseError::SyntaxError; + } + + if (pseudo_class_token.is_function()) { + auto parse_nth_child_selector = [this](auto pseudo_class, Vector const& function_values, bool allow_of = false) -> ParseErrorOr { + auto tokens = TokenStream(function_values); + auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens); + if (!nth_child_pattern.has_value()) { + dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class)); + return ParseError::SyntaxError; + } + + tokens.skip_whitespace(); + if (!tokens.has_next_token()) { + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .nth_child_pattern = nth_child_pattern.release_value() } + }; + } + + if (!allow_of) + return ParseError::SyntaxError; + + // Parse the `of ` syntax + auto const& maybe_of = tokens.next_token(); + if (!(maybe_of.is(Token::Type::Ident) && maybe_of.token().ident().equals_ignoring_ascii_case("of"sv))) + return ParseError::SyntaxError; + + tokens.skip_whitespace(); + auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone)); + + tokens.skip_whitespace(); + if (tokens.has_next_token()) + return ParseError::SyntaxError; + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .nth_child_pattern = nth_child_pattern.release_value(), + .argument_selector_list = move(selector_list) } + }; + }; + + auto const& pseudo_function = pseudo_class_token.function(); + auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name()); + if (!maybe_pseudo_class.has_value()) { + dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name()); + return ParseError::SyntaxError; + } + auto pseudo_class = maybe_pseudo_class.value(); + auto metadata = pseudo_class_metadata(pseudo_class); + + if (!metadata.is_valid_as_function) { + dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name()); + return ParseError::SyntaxError; + } + + if (pseudo_function.values().is_empty()) { + dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name()); + return ParseError::SyntaxError; + } + + switch (metadata.parameter_type) { + case PseudoClassMetadata::ParameterType::ANPlusB: + return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false); + case PseudoClassMetadata::ParameterType::ANPlusBOf: + return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true); + case PseudoClassMetadata::ParameterType::CompoundSelector: { + auto function_token_stream = TokenStream(pseudo_function.values()); + auto compound_selector_or_error = parse_compound_selector(function_token_stream); + if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) { + dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name()); + return ParseError::SyntaxError; + } + + Vector compound_selectors { compound_selector_or_error.release_value().release_value() }; + auto selector = Selector::create(move(compound_selectors)); + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .argument_selector_list = { move(selector) } } + }; + } + case PseudoClassMetadata::ParameterType::ForgivingSelectorList: { + auto function_token_stream = TokenStream(pseudo_function.values()); + // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list. + auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, SelectorType::Standalone, SelectorParsingMode::Forgiving)); + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .argument_selector_list = move(argument_selector_list) } + }; + } + case PseudoClassMetadata::ParameterType::LanguageRanges: { + Vector languages; + auto function_token_stream = TokenStream(pseudo_function.values()); + auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream); + + for (auto language_token_list : language_token_lists) { + auto language_token_stream = TokenStream(language_token_list); + language_token_stream.skip_whitespace(); + auto language_token = language_token_stream.next_token(); + if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) { + dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name()); + return ParseError::SyntaxError; + } + + auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident(); + languages.append(MUST(FlyString::from_utf8(language_string))); + + language_token_stream.skip_whitespace(); + if (language_token_stream.has_next_token()) { + dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name()); + return ParseError::SyntaxError; + } + } + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .languages = move(languages) } + }; + } + case PseudoClassMetadata::ParameterType::SelectorList: { + auto function_token_stream = TokenStream(pseudo_function.values()); + auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone)); + + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::PseudoClass, + .value = Selector::SimpleSelector::PseudoClassSelector { + .type = pseudo_class, + .argument_selector_list = move(not_selector) } + }; + } + case PseudoClassMetadata::ParameterType::None: + // `None` means this is not a function-type pseudo-class, so this state should be impossible. + VERIFY_NOT_REACHED(); + } + } + dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string()); + return ParseError::SyntaxError; +} + +Parser::ParseErrorOr> Parser::parse_simple_selector(TokenStream& tokens) +{ + auto peek_token_ends_selector = [&]() -> bool { + auto const& value = tokens.peek_token(); + return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma)); + }; + + if (peek_token_ends_selector()) + return Optional {}; + + // Handle universal and tag-name types together, since both can be namespaced + if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) { + if (qualified_name->name.name == "*"sv) { + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::Universal, + .value = qualified_name.release_value(), + }; + } + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::TagName, + .value = qualified_name.release_value(), + }; + } + + auto const& first_value = tokens.next_token(); + + if (first_value.is(Token::Type::Delim)) { + u32 delim = first_value.token().delim(); + switch (delim) { + case '*': + // Handled already + VERIFY_NOT_REACHED(); + case '.': { + if (peek_token_ends_selector()) + return ParseError::SyntaxError; + + auto const& class_name_value = tokens.next_token(); + if (!class_name_value.is(Token::Type::Ident)) { + dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string()); + return ParseError::SyntaxError; + } + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::Class, + .value = Selector::SimpleSelector::Name { FlyString::from_utf8(class_name_value.token().ident()).release_value_but_fixme_should_propagate_errors() } + }; + } + case '>': + case '+': + case '~': + case '|': + // Whitespace is not required between the compound-selector and a combinator. + // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error. + tokens.reconsume_current_input_token(); + return Optional {}; + default: + dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!"); + return ParseError::SyntaxError; + } + } + + if (first_value.is(Token::Type::Hash)) { + if (first_value.token().hash_type() != Token::HashType::Id) { + dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string()); + return ParseError::SyntaxError; + } + return Selector::SimpleSelector { + .type = Selector::SimpleSelector::Type::Id, + .value = Selector::SimpleSelector::Name { FlyString::from_utf8(first_value.token().hash_value()).release_value_but_fixme_should_propagate_errors() } + }; + } + + if (first_value.is_block() && first_value.block().is_square()) + return TRY(parse_attribute_simple_selector(first_value)); + + if (first_value.is(Token::Type::Colon)) + return TRY(parse_pseudo_simple_selector(tokens)); + + dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!"); + return ParseError::SyntaxError; +} + +Optional Parser::parse_a_n_plus_b_pattern(TokenStream& values) +{ + auto transaction = values.begin_transaction(); + auto syntax_error = [&]() -> Optional { + if constexpr (CSS_PARSER_DEBUG) { + dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:"); + values.dump_all_tokens(); + } + return {}; + }; + + auto is_n = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("n"sv); + }; + auto is_ndash = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("n-"sv); + }; + auto is_dashn = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("-n"sv); + }; + auto is_dashndash = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_ascii_case("-n-"sv); + }; + auto is_sign = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-'); + }; + auto is_n_dimension = [](ComponentValue const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (!value.token().number().is_integer()) + return false; + if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv)) + return false; + return true; + }; + auto is_ndash_dimension = [](ComponentValue const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (!value.token().number().is_integer()) + return false; + if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv)) + return false; + return true; + }; + auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (!value.token().number().is_integer()) + return false; + auto dimension_unit = value.token().dimension_unit(); + if (!dimension_unit.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + for (size_t i = 2; i < dimension_unit.length(); ++i) { + if (!is_ascii_digit(dimension_unit[i])) + return false; + } + return true; + }; + auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool { + if (!value.is(Token::Type::Ident)) + return false; + auto ident = value.token().ident(); + if (!ident.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + for (size_t i = 2; i < ident.length(); ++i) { + if (!is_ascii_digit(ident[i])) + return false; + } + return true; + }; + auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool { + if (!value.is(Token::Type::Ident)) + return false; + auto ident = value.token().ident(); + if (!ident.starts_with("-n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + if (ident.length() == 3) + return false; + for (size_t i = 3; i < ident.length(); ++i) { + if (!is_ascii_digit(ident[i])) + return false; + } + return true; + }; + auto is_integer = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Number) && value.token().number().is_integer(); + }; + auto is_signed_integer = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign(); + }; + auto is_signless_integer = [](ComponentValue const& value) -> bool { + return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign(); + }; + + // https://www.w3.org/TR/css-syntax-3/#the-anb-type + // Unfortunately these can't be in the same order as in the spec. + + values.skip_whitespace(); + auto const& first_value = values.next_token(); + + // odd | even + if (first_value.is(Token::Type::Ident)) { + auto ident = first_value.token().ident(); + if (ident.equals_ignoring_ascii_case("odd"sv)) { + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 2, 1 }; + } + if (ident.equals_ignoring_ascii_case("even"sv)) { + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 2, 0 }; + } + } + // + if (is_integer(first_value)) { + int b = first_value.token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 0, b }; + } + // + // + // ['+' | '-'] + if (is_n_dimension(first_value)) { + int a = first_value.token().dimension_value_int(); + values.skip_whitespace(); + + // + if (is_signed_integer(values.peek_token())) { + int b = values.next_token().token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { a, b }; + } + + // ['+' | '-'] + { + auto child_transaction = transaction.create_child(); + auto const& second_value = values.next_token(); + values.skip_whitespace(); + auto const& third_value = values.next_token(); + + if (is_sign(second_value) && is_signless_integer(third_value)) { + int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); + child_transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { a, b }; + } + } + + // + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { a, 0 }; + } + // + if (is_ndash_dimension(first_value)) { + values.skip_whitespace(); + auto const& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + int a = first_value.token().dimension_value_int(); + int b = -second_value.token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { a, b }; + } + + return syntax_error(); + } + // + if (is_ndashdigit_dimension(first_value)) { + auto const& dimension = first_value.token(); + int a = dimension.dimension_value_int(); + auto maybe_b = dimension.dimension_unit().substring_view(1).to_int(); + if (maybe_b.has_value()) { + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() }; + } + + return syntax_error(); + } + // + if (is_dashndashdigit_ident(first_value)) { + auto maybe_b = first_value.token().ident().substring_view(2).to_int(); + if (maybe_b.has_value()) { + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() }; + } + + return syntax_error(); + } + // -n + // -n + // -n ['+' | '-'] + if (is_dashn(first_value)) { + values.skip_whitespace(); + + // -n + if (is_signed_integer(values.peek_token())) { + int b = values.next_token().token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { -1, b }; + } + + // -n ['+' | '-'] + { + auto child_transaction = transaction.create_child(); + auto const& second_value = values.next_token(); + values.skip_whitespace(); + auto const& third_value = values.next_token(); + + if (is_sign(second_value) && is_signless_integer(third_value)) { + int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); + child_transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { -1, b }; + } + } + + // -n + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { -1, 0 }; + } + // -n- + if (is_dashndash(first_value)) { + values.skip_whitespace(); + auto const& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + int b = -second_value.token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { -1, b }; + } + + return syntax_error(); + } + + // All that's left now are these: + // '+'?† n + // '+'?† n + // '+'?† n ['+' | '-'] + // '+'?† n- + // '+'?† + // In all of these cases, the + is optional, and has no effect. + // So, we just skip the +, and carry on. + if (!first_value.is_delim('+')) { + values.reconsume_current_input_token(); + // We do *not* skip whitespace here. + } + + auto const& first_after_plus = values.next_token(); + // '+'?† n + // '+'?† n + // '+'?† n ['+' | '-'] + if (is_n(first_after_plus)) { + values.skip_whitespace(); + + // '+'?† n + if (is_signed_integer(values.peek_token())) { + int b = values.next_token().token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 1, b }; + } + + // '+'?† n ['+' | '-'] + { + auto child_transaction = transaction.create_child(); + auto const& second_value = values.next_token(); + values.skip_whitespace(); + auto const& third_value = values.next_token(); + + if (is_sign(second_value) && is_signless_integer(third_value)) { + int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1); + child_transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 1, b }; + } + } + + // '+'?† n + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 1, 0 }; + } + + // '+'?† n- + if (is_ndash(first_after_plus)) { + values.skip_whitespace(); + auto const& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + int b = -second_value.token().to_integer(); + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 1, b }; + } + + return syntax_error(); + } + + // '+'?† + if (is_ndashdigit_ident(first_after_plus)) { + auto maybe_b = first_after_plus.token().ident().substring_view(1).to_int(); + if (maybe_b.has_value()) { + transaction.commit(); + return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() }; + } + + return syntax_error(); + } + + return syntax_error(); +} + +}