diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index b26d81bd46..662189f2e4 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -2705,13 +2705,8 @@ Optional Parser::parse_ratio(TokenStream& tokens) // https://www.w3.org/TR/css-syntax-3/#urange-syntax Optional Parser::parse_unicode_range(TokenStream& tokens) { + auto transaction = tokens.begin_transaction(); tokens.skip_whitespace(); - auto position = tokens.position(); - - auto error = [&]() -> Optional { - tokens.rewind_to_position(position); - return {}; - }; // = // u '+' '?'* | @@ -2723,9 +2718,9 @@ Optional Parser::parse_unicode_range(TokenStream& // (All with no whitespace in between tokens.) // NOTE: Parsing this is different from usual. We take these steps: - // 1. Match the grammar above against the tokens. - // 2. Convert the matching tokens back into a string using their original representation. - // 3. Then, parse that string according to the spec algorithm. + // 1. Match the grammar above against the tokens, concatenating them into a string using their original representation. + // 2. Then, parse that string according to the spec algorithm. + // Step 2 is performed by calling the other parse_unicode_range() overload. auto is_question_mark = [](ComponentValue const& component_value) { return component_value.is(Token::Type::Delim) && component_value.token().delim() == '?'; @@ -2738,78 +2733,102 @@ Optional Parser::parse_unicode_range(TokenStream& || component_value.is(Token::Type::Whitespace); }; + auto representation_of = [](ComponentValue const& component_value) { + // FIXME: This should use the "representation", that is, the original text that produced the token. + // See: https://www.w3.org/TR/css-syntax-3/#representation + // We don't have a way to get that, so instead, we're relying on Token::to_string(), and + // handling specific cases where that's not enough. + // Integers like `+34` get serialized as `34`, so manually include the `+` sign. + if (component_value.is(Token::Type::Number) && component_value.token().number().is_integer_with_explicit_sign()) { + auto int_value = component_value.token().number().integer_value(); + return String::formatted("{:+}", int_value); + } + + return component_value.to_string(); + }; + + auto create_unicode_range = [&](StringView text, auto& local_transaction) -> Optional { + auto maybe_unicode_range = parse_unicode_range(text); + if (maybe_unicode_range.has_value()) { + local_transaction.commit(); + transaction.commit(); + } + return maybe_unicode_range; + }; + // All options start with 'u'/'U'. auto& u = tokens.next_token(); if (!(u.is(Token::Type::Ident) && u.token().ident().equals_ignoring_case("u"))) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: does not start with 'u'"); - return error(); + return {}; } auto& second_token = tokens.next_token(); - auto after_second_token = tokens.position(); // u '+' '?'* | // u '+' '?'+ if (second_token.is(Token::Type::Delim) && second_token.token().delim() == '+') { + auto local_transaction = tokens.begin_transaction(); + StringBuilder string_builder; + string_builder.append(representation_of(second_token)); + auto& third_token = tokens.next_token(); if (third_token.is(Token::Type::Ident) || is_question_mark(third_token)) { + string_builder.append(representation_of(third_token)); while (is_question_mark(tokens.peek_token())) - tokens.next_token(); + string_builder.append(representation_of(tokens.next_token())); if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); + return create_unicode_range(string_builder.string_view(), local_transaction); } - - tokens.rewind_to_position(after_second_token); } // u '?'* if (second_token.is(Token::Type::Dimension)) { + auto local_transaction = tokens.begin_transaction(); + StringBuilder string_builder; + string_builder.append(representation_of(second_token)); while (is_question_mark(tokens.peek_token())) - tokens.next_token(); + string_builder.append(representation_of(tokens.next_token())); if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); - - tokens.rewind_to_position(after_second_token); + return create_unicode_range(string_builder.string_view(), local_transaction); } // u '?'* | // u | // u if (second_token.is(Token::Type::Number)) { + auto local_transaction = tokens.begin_transaction(); + StringBuilder string_builder; + string_builder.append(representation_of(second_token)); + if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); + return create_unicode_range(string_builder.string_view(), local_transaction); auto& third_token = tokens.next_token(); + string_builder.append(representation_of(third_token)); if (is_question_mark(third_token)) { while (is_question_mark(tokens.peek_token())) - tokens.next_token(); + string_builder.append(representation_of(tokens.next_token())); if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); + return create_unicode_range(string_builder.string_view(), local_transaction); } else if (third_token.is(Token::Type::Dimension)) { if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); + return create_unicode_range(string_builder.string_view(), local_transaction); } else if (third_token.is(Token::Type::Number)) { if (is_ending_token(tokens.peek_token())) - return create_unicode_range_from_tokens(tokens, position, tokens.position()); + return create_unicode_range(string_builder.string_view(), local_transaction); } - - tokens.rewind_to_position(after_second_token); } if constexpr (CSS_PARSER_DEBUG) { dbgln("CSSParser: Tokens did not match grammar."); tokens.dump_all_tokens(); } - return error(); + return {}; } -Optional Parser::create_unicode_range_from_tokens(TokenStream& tokens, int start_position, int end_position) +Optional Parser::parse_unicode_range(StringView text) { - auto error = [&]() -> Optional { - tokens.rewind_to_position(start_position); - return {}; - }; - auto make_valid_unicode_range = [&](u32 start_value, u32 end_value) -> Optional { // https://www.w3.org/TR/css-syntax-3/#maximum-allowed-code-point constexpr u32 maximum_allowed_code_point = 0x10FFFF; @@ -2819,13 +2838,13 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream is invalid and a syntax error. if (end_value > maximum_allowed_code_point) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: Invalid : end_value ({}) > maximum ({})", end_value, maximum_allowed_code_point); - return error(); + return {}; } // 2. If start value is greater than end value, the is invalid and a syntax error. if (start_value > end_value) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: Invalid : start_value ({}) > end_value ({})", start_value, end_value); - return error(); + return {}; } // 3. Otherwise, the represents a contiguous range of codepoints from start value to end value, inclusive. @@ -2834,26 +2853,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream= 0) - text_builder.append('+'); - text_builder.append(String::number(int_value)); - } else { - text_builder.append(token.to_string()); - } - } - auto text = text_builder.string_view(); + // NOTE: The concatenation is already done by the caller. GenericLexer lexer { text }; // 2. If the first character of text is U+002B PLUS SIGN, consume it. @@ -2862,7 +2862,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream was not '+'; got: '{}'", lexer.consume()); - return error(); + return {}; } // 3. Consume as many hex digits from text as possible. @@ -2874,7 +2874,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream 6) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: start value had {} digits/?s, expected between 1 and 6.", consumed_code_points); - return error(); + return {}; } StringView start_value_code_points { hex_digits.characters_without_null_termination(), consumed_code_points }; @@ -2884,7 +2884,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream invalid; had {} code points left over.", lexer.tell_remaining()); - return error(); + return {}; } // 2. Interpret the consumed code points as a hexadecimal number, @@ -2894,7 +2894,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream(start_value_string); if (!maybe_start_value.has_value()) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: ?-converted start value did not parse as hex number."); - return error(); + return {}; } u32 start_value = maybe_start_value.release_value(); @@ -2905,7 +2905,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream(end_value_string); if (!maybe_end_value.has_value()) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: ?-converted end value did not parse as hex number."); - return error(); + return {}; } u32 end_value = maybe_end_value.release_value(); @@ -2916,7 +2916,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream(start_value_code_points); if (!maybe_start_value.has_value()) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: start value did not parse as hex number."); - return error(); + return {}; } u32 start_value = maybe_start_value.release_value(); @@ -2932,7 +2932,7 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream, and this algorithm must exit. else { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: start and end values not separated by '-'."); - return error(); + return {}; } // 6. Consume as many hex digits as possible from text. @@ -2942,20 +2942,20 @@ Optional Parser::create_unicode_range_from_tokens(TokenStream, and this algorithm must exit. if (end_hex_digits.length() == 0 || end_hex_digits.length() > 6) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: end value had {} digits, expected between 1 and 6.", end_hex_digits.length()); - return error(); + return {}; } // If there are any code points left in text, this is an invalid , and this algorithm must exit. if (lexer.tell_remaining() != 0) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: invalid; had {} code points left over.", lexer.tell_remaining()); - return error(); + return {}; } // 7. Interpret the consumed code points as a hexadecimal number. This is the end value. auto maybe_end_value = AK::StringUtils::convert_to_uint_from_hex(end_hex_digits); if (!maybe_end_value.has_value()) { dbgln_if(CSS_PARSER_DEBUG, "CSSParser: end value did not parse as hex number."); - return error(); + return {}; } u32 end_value = maybe_end_value.release_value(); diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h index 6617ffaedd..d2a3e0cf43 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -310,7 +310,7 @@ private: Optional parse_length(ComponentValue const&); Optional parse_ratio(TokenStream&); Optional parse_unicode_range(TokenStream&); - Optional create_unicode_range_from_tokens(TokenStream&, int start_position, int end_position); + Optional parse_unicode_range(StringView); enum class AllowedDataUrlType { None,