1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 04:17:35 +00:00

LibWeb: Add rules for distinguishing if a resource is text or binary

Resolves a FIXME in MimeSniff::Resource allowing us to determine
the computed MIME type given supplied types that are used in older
versions of Apache that need special handling.
This commit is contained in:
Kemal Zebari 2023-12-07 21:27:25 -08:00 committed by Andrew Kaster
parent 0b7148e2a6
commit 5d14691149
3 changed files with 102 additions and 13 deletions

View file

@ -31,11 +31,13 @@ TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
}
// Make sure we cover the XML code path in the mime type sniffing algorithm.
TEST_CASE(determine_computed_mime_type_given_xml_mime_type_as_supplied_type)
{
auto xml_mime_type = "application/rss+xml"sv;
supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized()));
}
@ -60,6 +62,53 @@ static void set_audio_or_video_type_mappings(HashMap<StringView, Vector<StringVi
mime_type_to_headers_map.set("audio/wave"sv, { "RIFF\x00\x00\x00\x00WAVE"sv });
}
static void set_text_plain_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
{
mime_type_to_headers_map.set("text/plain"sv, {
"\xFE\xFF\x00\x00"sv,
"\xFF\xFE\x00\x00"sv,
"\xEF\xBB\xBF\x00"sv,
"Hello world!"sv,
});
}
TEST_CASE(determine_computed_mime_type_given_supplied_type_that_is_an_apache_bug_mime_type)
{
Vector<StringView> apache_bug_mime_types = {
"text/plain"sv,
"text/plain; charset=ISO-8859-1"sv,
"text/plain; charset=iso-8859-1"sv,
"text/plain; charset=UTF-8"sv
};
// Cover all Apache bug MIME types.
for (auto const& apache_bug_mime_type : apache_bug_mime_types) {
auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(apache_bug_mime_type)).release_value();
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("Hello world!"sv.bytes(),
Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
EXPECT_EQ("text/plain"sv, MUST(computed_mime_type.serialized()));
}
// Cover all code paths in "rules for distinguishing if a resource is text or binary".
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
set_text_plain_type_mappings(mime_type_to_headers_map);
auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "plain"_string));
for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
auto mime_type = mime_type_to_headers.key;
for (auto const& header : mime_type_to_headers.value) {
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(),
Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
}
}
}
TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context)
{
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
@ -87,13 +136,8 @@ TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_contex
mime_type_to_headers_map.set("text/xml"sv, { "<?xml"sv });
mime_type_to_headers_map.set("application/pdf"sv, { "%PDF-"sv });
mime_type_to_headers_map.set("application/postscript"sv, { "%!PS-Adobe-"sv });
mime_type_to_headers_map.set("text/plain"sv, {
"\xFE\xFF\x00\x00"sv,
"\xFF\xFE\x00\x00"sv,
"\xEF\xBB\xBF\x00"sv,
"Hello world!"sv,
});
set_text_plain_type_mappings(mime_type_to_headers_map);
set_image_type_mappings(mime_type_to_headers_map);
set_audio_or_video_type_mappings(mime_type_to_headers_map);