From fb727332f9f15f84020f7e7b1ea123df8b65ca0c Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Tue, 4 Jul 2023 09:50:47 +0200 Subject: [PATCH] LibWeb: Decode linked style sheets before parsing them This fixes an issue where a BOM at the head of a style sheet would be passed verbatim to the parser, who would then interpret it as an ident token and (after some confusion) fail to parse the first rule, but then carry on with the rest of the sheet. --- .../css/style-sheet-with-byte-order-mark.txt | 1 + .../css/style-sheet-with-byte-order-mark.css | 7 ++++ .../css/style-sheet-with-byte-order-mark.html | 1 + .../Libraries/LibWeb/HTML/HTMLLinkElement.cpp | 42 +++++++++++++++---- 4 files changed, 42 insertions(+), 9 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/css/style-sheet-with-byte-order-mark.txt create mode 100644 Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.css create mode 100644 Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.html diff --git a/Tests/LibWeb/Text/expected/css/style-sheet-with-byte-order-mark.txt b/Tests/LibWeb/Text/expected/css/style-sheet-with-byte-order-mark.txt new file mode 100644 index 0000000000..ba69e3e181 --- /dev/null +++ b/Tests/LibWeb/Text/expected/css/style-sheet-with-byte-order-mark.txt @@ -0,0 +1 @@ +PASS \ No newline at end of file diff --git a/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.css b/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.css new file mode 100644 index 0000000000..9ab1f3a39c --- /dev/null +++ b/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.css @@ -0,0 +1,7 @@ +.pass { + display: block; +} + +div { + display: none; +} diff --git a/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.html b/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.html new file mode 100644 index 0000000000..08274e9a83 --- /dev/null +++ b/Tests/LibWeb/Text/input/css/style-sheet-with-byte-order-mark.html @@ -0,0 +1 @@ +
PASS diff --git a/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp index d4512833b7..be46859861 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLLinkElement.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -341,18 +342,41 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru // The CSS environment encoding is the result of running the following steps: [CSSSYNTAX] // 1. If the element has a charset attribute, get an encoding from that attribute's value. If that succeeds, return the resulting encoding. [ENCODING] // 2. Otherwise, return the document's character encoding. [DOM] - m_loaded_style_sheet = parse_css_stylesheet(CSS::Parser::ParsingContext(document(), *response.url()), body_bytes.template get()); - if (m_loaded_style_sheet) { - m_loaded_style_sheet->set_owner_node(this); - m_loaded_style_sheet->set_media(attribute(HTML::AttributeNames::media)); - document().style_sheets().add_sheet(*m_loaded_style_sheet); + DeprecatedString encoding; + if (auto charset = attribute(HTML::AttributeNames::charset); !charset.is_null()) + encoding = charset; + else + encoding = document().encoding_or_default(); + + auto decoder = TextCodec::decoder_for(encoding); + + if (!decoder.has_value()) { + // If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not. + dbgln("FIXME: Style sheet encoding '{}' is not supported yet", encoding); + dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error).release_value_but_fixme_should_propagate_errors()); } else { - dbgln_if(CSS_LOADER_DEBUG, "HTMLLinkElement: Failed to parse stylesheet: {}", resource()->url()); - } + auto const& encoded_string = body_bytes.get(); + auto maybe_decoded_string = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string); + if (maybe_decoded_string.is_error()) { + dbgln("Style sheet {} claimed to be '{}' but decoding failed", response.url().value_or(AK::URL()), encoding); + dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error).release_value_but_fixme_should_propagate_errors()); + } else { + auto const decoded_string = maybe_decoded_string.release_value(); + m_loaded_style_sheet = parse_css_stylesheet(CSS::Parser::ParsingContext(document(), *response.url()), decoded_string); - // 2. Fire an event named load at el. - dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::load).release_value_but_fixme_should_propagate_errors()); + if (m_loaded_style_sheet) { + m_loaded_style_sheet->set_owner_node(this); + m_loaded_style_sheet->set_media(attribute(HTML::AttributeNames::media)); + document().style_sheets().add_sheet(*m_loaded_style_sheet); + } else { + dbgln_if(CSS_LOADER_DEBUG, "HTMLLinkElement: Failed to parse stylesheet: {}", resource()->url()); + } + + // 2. Fire an event named load at el. + dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::load).release_value_but_fixme_should_propagate_errors()); + } + } } // 5. Otherwise, fire an event named error at el. else {