From cf5f3a98741e531e26fe66e9793f265d633c2caa Mon Sep 17 00:00:00 2001 From: Valtteri Koskivuori Date: Mon, 3 May 2021 21:48:21 +0300 Subject: [PATCH] LibCore: Implement basic mime type guessing based on binary patterns This attempts to guess the mime-type from a given set of bytes from the start of a file. It only supports a few well-defined patterns for now, but it's a start! --- Userland/Libraries/LibCore/MimeData.cpp | 31 +++++++++++++++++++++++++ Userland/Libraries/LibCore/MimeData.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/Userland/Libraries/LibCore/MimeData.cpp b/Userland/Libraries/LibCore/MimeData.cpp index 3fa27eeda4..61fc261055 100644 --- a/Userland/Libraries/LibCore/MimeData.cpp +++ b/Userland/Libraries/LibCore/MimeData.cpp @@ -79,4 +79,35 @@ String guess_mime_type_based_on_filename(const StringView& path) return "text/plain"; } +#define ENUMERATE_HEADER_CONTENTS \ + __ENUMERATE_MIME_TYPE_HEADER(bmp, "image/bmp", 2, 'B', 'M') \ + __ENUMERATE_MIME_TYPE_HEADER(bzip2, "application/x-bzip2", 3, 'B', 'Z', 'h') \ + __ENUMERATE_MIME_TYPE_HEADER(elf, "extra/elf", 4, 0x7F, 'E', 'L', 'F') \ + __ENUMERATE_MIME_TYPE_HEADER(gif_87, "image/gif", 6, 'G', 'I', 'F', '8', '7', 'a') \ + __ENUMERATE_MIME_TYPE_HEADER(gif_89, "image/gif", 6, 'G', 'I', 'F', '8', '9', 'a') \ + __ENUMERATE_MIME_TYPE_HEADER(gzip, "extra/gzip", 2, 0x1F, 0x8B) \ + __ENUMERATE_MIME_TYPE_HEADER(jpeg, "image/jpeg", 4, 0xFF, 0xD8, 0xFF, 0xDB) \ + __ENUMERATE_MIME_TYPE_HEADER(jpeg_huh, "image/jpeg", 4, 0xFF, 0xD8, 0xFF, 0xEE) \ + __ENUMERATE_MIME_TYPE_HEADER(jpeg_jfif, "image/jpeg", 12, 0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 'J', 'F', 'I', 'F', 0x00, 0x01) \ + __ENUMERATE_MIME_TYPE_HEADER(pbm, "image/x-portable-bitmap", 3, 0x50, 0x31, 0x0A) \ + __ENUMERATE_MIME_TYPE_HEADER(pgm, "image/x-portable-graymap", 3, 0x50, 0x32, 0x0A) \ + __ENUMERATE_MIME_TYPE_HEADER(png, "image/png", 8, 0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A) \ + __ENUMERATE_MIME_TYPE_HEADER(ppm, "image/x-portable-pixmap", 3, 0x50, 0x33, 0x0A) \ + __ENUMERATE_MIME_TYPE_HEADER(shell, "text/x-shellscript", 10, '#', '!', '/', 'b', 'i', 'n', '/', 's', 'h', '\n') + +#define __ENUMERATE_MIME_TYPE_HEADER(var_name, mime_type, pattern_size, ...) \ + static const u8 var_name##_arr[pattern_size] = { __VA_ARGS__ }; \ + static constexpr ReadonlyBytes var_name = ReadonlyBytes { var_name##_arr, pattern_size }; +ENUMERATE_HEADER_CONTENTS +#undef __ENUMERATE_MIME_TYPE_HEADER + +Optional guess_mime_type_based_on_sniffed_bytes(const ReadonlyBytes& bytes) +{ +#define __ENUMERATE_MIME_TYPE_HEADER(var_name, mime_type, pattern_size, ...) \ + if (bytes.starts_with(var_name)) \ + return mime_type; + ENUMERATE_HEADER_CONTENTS; +#undef __ENUMERATE_MIME_TYPE_HEADER + return {}; +} } diff --git a/Userland/Libraries/LibCore/MimeData.h b/Userland/Libraries/LibCore/MimeData.h index 8f9cb0141d..a145cdd552 100644 --- a/Userland/Libraries/LibCore/MimeData.h +++ b/Userland/Libraries/LibCore/MimeData.h @@ -49,4 +49,6 @@ private: String guess_mime_type_based_on_filename(const StringView&); +Optional guess_mime_type_based_on_sniffed_bytes(const ReadonlyBytes&); + }