From 02ea85da2c5ca3322da4f78f24041faaaf125b2d Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Thu, 30 Nov 2023 12:45:58 -0800 Subject: [PATCH] LibWeb/MimeSniff: Add sniffing in an image context --- Tests/LibWeb/TestMimeSniff.cpp | 61 +++++++++++++++++-- .../Libraries/LibWeb/MimeSniff/Resource.cpp | 42 +++++++++++++ .../Libraries/LibWeb/MimeSniff/Resource.h | 4 +- 3 files changed, 100 insertions(+), 7 deletions(-) diff --git a/Tests/LibWeb/TestMimeSniff.cpp b/Tests/LibWeb/TestMimeSniff.cpp index e269e9dbf8..a4be94e66c 100644 --- a/Tests/LibWeb/TestMimeSniff.cpp +++ b/Tests/LibWeb/TestMimeSniff.cpp @@ -14,6 +14,15 @@ TEST_CASE(determine_computed_mime_type_given_no_sniff_is_set) auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = mime_type, .no_sniff = true })); EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized())); + + // Cover the edge case in the context-specific sniffing algorithm. + computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { + .sniffing_context = Web::MimeSniff::SniffingContext::Image, + .supplied_type = mime_type, + .no_sniff = true, + })); + + EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized())); } TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset) @@ -31,6 +40,16 @@ TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset) EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized())); } +static void set_image_type_mappings(HashMap>& mime_type_to_headers_map) +{ + mime_type_to_headers_map.set("image/x-icon"sv, { "\x00\x00\x01\x00"sv, "\x00\x00\x02\x00"sv }); + mime_type_to_headers_map.set("image/bmp"sv, { "BM"sv }); + mime_type_to_headers_map.set("image/gif"sv, { "GIF87a"sv, "GIF89a"sv }); + mime_type_to_headers_map.set("image/webp"sv, { "RIFF\x00\x00\x00\x00WEBPVP"sv }); + mime_type_to_headers_map.set("image/png"sv, { "\x89PNG\x0D\x0A\x1A\x0A"sv }); + mime_type_to_headers_map.set("image/jpeg"sv, { "\xFF\xD8\xFF"sv }); +} + TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context) { HashMap> mime_type_to_headers_map; @@ -64,12 +83,9 @@ TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_contex "\xEF\xBB\xBF\x00"sv, "Hello world!"sv, }); - mime_type_to_headers_map.set("image/x-icon"sv, { "\x00\x00\x01\x00"sv, "\x00\x00\x02\x00"sv }); - mime_type_to_headers_map.set("image/bmp"sv, { "BM"sv }); - mime_type_to_headers_map.set("image/gif"sv, { "GIF87a"sv, "GIF89a"sv }); - mime_type_to_headers_map.set("image/webp"sv, { "RIFF\x00\x00\x00\x00WEBPVP"sv }); - mime_type_to_headers_map.set("image/png"sv, { "\x89PNG\x0D\x0A\x1A\x0A"sv }); - mime_type_to_headers_map.set("image/jpeg"sv, { "\xFF\xD8\xFF"sv }); + + set_image_type_mappings(mime_type_to_headers_map); + mime_type_to_headers_map.set("audio/aiff"sv, { "FORM\x00\x00\x00\x00\x41IFF"sv }); mime_type_to_headers_map.set("audio/mpeg"sv, { "ID3"sv }); mime_type_to_headers_map.set("application/ogg"sv, { "OggS\x00"sv }); @@ -110,3 +126,36 @@ TEST_CASE(compute_mime_type_given_unknown_supplied_type) EXPECT_EQ("text/html"sv, computed_mime_type.essence()); } } + +TEST_CASE(determine_computed_mime_type_in_image_sniffing_context) +{ + // Cover case where supplied type is an XML MIME type. + auto mime_type = "application/rss+xml"sv; + auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value(); + auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type })); + + EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized())); + + HashMap> mime_type_to_headers_map; + + set_image_type_mappings(mime_type_to_headers_map); + + // Also consider a resource that is not an image. + mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv }); + + for (auto const& mime_type_to_headers : mime_type_to_headers_map) { + mime_type = mime_type_to_headers.key; + + for (auto const& header : mime_type_to_headers.value) { + computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image })); + EXPECT_EQ(mime_type, computed_mime_type.essence()); + } + } + + // Cover case where we aren't dealing with an image MIME type. + mime_type = "text/html"sv; + supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value(); + computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type })); + + EXPECT_EQ(mime_type, computed_mime_type.essence()); +} diff --git a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp index c6a194db88..60e693ab90 100644 --- a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp +++ b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp @@ -531,6 +531,48 @@ ErrorOr Resource::context_specific_sniffing_algorithm(SniffingContext snif return mime_type_sniffing_algorithm(); } + // NOTE: Non-standard but if the client expects us to not sniff, we shouldn't be doing any + // context-specific sniffing if we don't have to. + if (m_no_sniff && m_supplied_mime_type.has_value()) { + m_computed_mime_type = m_supplied_mime_type.value(); + return {}; + } + + if (sniffing_context == SniffingContext::Image) + return rules_for_sniffing_images_specifically(); + + return {}; +} + +// https://mimesniff.spec.whatwg.org/#sniffing-in-an-image-context +ErrorOr Resource::rules_for_sniffing_images_specifically() +{ + // 1. If the supplied MIME type is an XML MIME type, the computed MIME type is the supplied MIME type. + // Abort these steps. + // NOTE: Non-standard but due to the mime type detection algorithm we need this sanity check. + if (m_supplied_mime_type.has_value() && m_supplied_mime_type->is_xml()) { + m_computed_mime_type = m_supplied_mime_type.value(); + return {}; + } + + // 2. Let image-type-matched be the result of executing the image type pattern matching algorithm with + // the resource header as the byte sequence to be matched. + auto image_type_matched = TRY(match_an_image_type_pattern(resource_header())); + + // 3. If image-type-matched is not undefined, the computed MIME type is image-type-matched. + // Abort these steps. + if (image_type_matched.has_value()) { + m_computed_mime_type = image_type_matched.release_value(); + return {}; + } + + // 4. The computed MIME type is the supplied MIME type. + // NOTE: Non-standard but due to the mime type detection algorithm we need this sanity check. + if (m_supplied_mime_type.has_value()) { + m_computed_mime_type = m_supplied_mime_type.value(); + } + + // NOTE: Non-standard but if the supplied mime type is undefined, we use computed mime type's default value. return {}; } diff --git a/Userland/Libraries/LibWeb/MimeSniff/Resource.h b/Userland/Libraries/LibWeb/MimeSniff/Resource.h index c3499f6da2..826e49a4d8 100644 --- a/Userland/Libraries/LibWeb/MimeSniff/Resource.h +++ b/Userland/Libraries/LibWeb/MimeSniff/Resource.h @@ -12,7 +12,8 @@ namespace Web::MimeSniff { enum class SniffingContext { None, - Browsing + Browsing, + Image }; struct SniffingConfiguration { @@ -40,6 +41,7 @@ private: ErrorOr supplied_mime_type_detection_algorithm(StringView scheme, Optional supplied_type); ErrorOr mime_type_sniffing_algorithm(); ErrorOr context_specific_sniffing_algorithm(SniffingContext sniffing_context); + ErrorOr rules_for_sniffing_images_specifically(); // https://mimesniff.spec.whatwg.org/#supplied-mime-type // A supplied MIME type, the MIME type determined by the supplied MIME type detection algorithm.