From f6d3ea33fa8881a99a4617cb90ee3238fef726ff Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Sat, 14 Oct 2023 20:02:30 -0700 Subject: [PATCH] LibWeb/MimeSniff: Add match an archive type pattern algorithm --- Tests/LibWeb/TestMimeSniff.cpp | 3 ++ .../Libraries/LibWeb/MimeSniff/Resource.cpp | 33 ++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Tests/LibWeb/TestMimeSniff.cpp b/Tests/LibWeb/TestMimeSniff.cpp index 446f963cf9..49572bce34 100644 --- a/Tests/LibWeb/TestMimeSniff.cpp +++ b/Tests/LibWeb/TestMimeSniff.cpp @@ -76,6 +76,9 @@ TEST_CASE(compute_unknown_mime_type) mime_type_to_headers_map.set("audio/midi"sv, { "MThd\x00\x00\x00\x06"sv }); mime_type_to_headers_map.set("video/avi"sv, { "RIFF\x00\x00\x00\x00\x41\x56\x49\x20"sv }); mime_type_to_headers_map.set("audio/wave"sv, { "RIFF\x00\x00\x00\x00WAVE"sv }); + mime_type_to_headers_map.set("application/x-gzip"sv, { "\x1F\x8B\x08"sv }); + mime_type_to_headers_map.set("application/zip"sv, { "PK\x03\x04"sv }); + mime_type_to_headers_map.set("application/x-rar-compressed"sv, { "Rar\x20\x1A\x07\x00"sv }); for (auto const& mime_type_to_headers : mime_type_to_headers_map) { auto mime_type = mime_type_to_headers.key; diff --git a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp index 5e65da5da6..d578bcd565 100644 --- a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp +++ b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp @@ -182,6 +182,36 @@ ErrorOr> match_an_audio_or_video_type_pattern(ReadonlyBytes i return OptionalNone {}; } +// https://mimesniff.spec.whatwg.org/#matching-an-archive-type-pattern +ErrorOr> match_an_archive_type_pattern(ReadonlyBytes input) +{ + // 1. Execute the following steps for each row row in the following table: + static Array constexpr pattern_table { + // The GZIP archive signature. + BytePatternTableRow { "\x1F\x8B\x08"sv, "\xFF\xFF\xFF"sv, no_ignored_bytes, "application/x-gzip"sv }, + + // The string "PK" followed by ETX EOT, the ZIP archive signature. + BytePatternTableRow { "\x50\x4B\x03\x04"sv, "\xFF\xFF\xFF\xFF"sv, no_ignored_bytes, "application/zip"sv }, + + // The string "Rar " followed by SUB BEL NUL, the RAR archive signature. + BytePatternTableRow { "\x52\x61\x72\x20\x1A\x07\x00"sv, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF"sv, no_ignored_bytes, "application/x-rar-compressed"sv }, + }; + + for (auto const& row : pattern_table) { + // 1. Let patternMatched be the result of the pattern matching algorithm given input, the + // value in the first column of row, the value in the second column of row, and the + // value in the third column of row. + auto pattern_matched = pattern_matching_algorithm(input, row.byte_pattern.bytes(), row.pattern_mask.bytes(), row.ignored_leading_bytes); + + // 2. If patternMatched is true, return the value in the fourth column of row. + if (pattern_matched) + return MimeType::parse(row.mime_type); + } + + // 2. Return undefined. + return OptionalNone {}; +} + // https://mimesniff.spec.whatwg.org/#rules-for-identifying-an-unknown-mime-type ErrorOr rules_for_identifying_an_unknown_mime_type(Resource const& resource, bool sniff_scriptable = false) { @@ -314,7 +344,8 @@ ErrorOr rules_for_identifying_an_unknown_mime_type(Resource const& res if (matched_type.has_value()) return matched_type.release_value(); - // FIXME: 7. Set matchedType to the result of executing the archive type pattern matching algorithm given resource’s resource header. + // 7. Set matchedType to the result of executing the archive type pattern matching algorithm given resource’s resource header. + matched_type = TRY(match_an_archive_type_pattern(resource.resource_header())); // 8. If matchedType is not undefined, return matchedType. if (matched_type.has_value())