From 1eaaa8c3e913e85e8297db52ea7df47d7bc9a8a1 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 3 Mar 2024 21:54:00 -0500 Subject: [PATCH] LibPDF+LibGfx: Support JBIG2s with /JBIG2Globals set Several ramifications: * /JBIG2Globals is an indirect reference, which means we now need a Document for unfiltering. (Technically, other decode parameters can also be indirect objects and we should use the Document to resolve() those too, but in practice it only seems to be needed for /JBIG2Globals.) * Since /JBIG2Globals are so rare, we just parse once for each image that use them, and decode_embedded() now receives a Vector with all sections of sequences of segments. * Internally, decode_segment_headers() is now called several times for embedded JBIG2s with multiple such sections (e.g. PDFs with /JBIG2Globals). * That means `data` is now no longer part of JBIG2LoadingContext and things get slightly reshuffled due to this. This completes the LibPDF part of JBIG2 support. Once LibGfx implements actual decoding of JBIG2s, things should start to Just Work in PDFs. --- .../LibGfx/ImageFormats/JBIG2Loader.cpp | 34 +++++++++---------- .../LibGfx/ImageFormats/JBIG2Loader.h | 4 +-- Userland/Libraries/LibPDF/Filter.cpp | 16 +++++---- Userland/Libraries/LibPDF/Filter.h | 4 +-- Userland/Libraries/LibPDF/Parser.cpp | 2 +- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.cpp b/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.cpp index 301e25a680..17c988843d 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.cpp +++ b/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.cpp @@ -73,7 +73,6 @@ struct JBIG2LoadingContext { Error, }; State state { State::NotDecoded }; - ReadonlyBytes data; Organization organization { Organization::Sequential }; IntSize size; @@ -83,12 +82,12 @@ struct JBIG2LoadingContext { Vector segments; }; -static ErrorOr decode_jbig2_header(JBIG2LoadingContext& context) +static ErrorOr decode_jbig2_header(JBIG2LoadingContext& context, ReadonlyBytes data) { - if (!JBIG2ImageDecoderPlugin::sniff(context.data)) + if (!JBIG2ImageDecoderPlugin::sniff(data)) return Error::from_string_literal("JBIG2LoadingContext: Invalid JBIG2 header"); - FixedMemoryStream stream(context.data.slice(sizeof(id_string))); + FixedMemoryStream stream(data.slice(sizeof(id_string))); // D.4.2 File header flags u8 header_flags = TRY(stream.read_value()); @@ -219,11 +218,8 @@ static ErrorOr scan_for_immediate_generic_region_size(ReadonlyBytes data return size; } -static ErrorOr decode_segment_headers(JBIG2LoadingContext& context) +static ErrorOr decode_segment_headers(JBIG2LoadingContext& context, ReadonlyBytes data) { - ReadonlyBytes data = context.data; - if (context.organization != Organization::Embedded) - data = data.slice(sizeof(id_string) + sizeof(u8) + (context.number_of_pages.has_value() ? sizeof(u32) : 0)); FixedMemoryStream stream(data); Vector segment_datas; @@ -270,10 +266,9 @@ static ErrorOr decode_segment_headers(JBIG2LoadingContext& context) return {}; } -JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin(ReadonlyBytes data) +JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin() { m_context = make(); - m_context->data = data; } IntSize JBIG2ImageDecoderPlugin::size() @@ -288,9 +283,12 @@ bool JBIG2ImageDecoderPlugin::sniff(ReadonlyBytes data) ErrorOr> JBIG2ImageDecoderPlugin::create(ReadonlyBytes data) { - auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin(data))); - TRY(decode_jbig2_header(*plugin->m_context)); - TRY(decode_segment_headers(*plugin->m_context)); + auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin())); + TRY(decode_jbig2_header(*plugin->m_context, data)); + + data = data.slice(sizeof(id_string) + sizeof(u8) + (plugin->m_context->number_of_pages.has_value() ? sizeof(u32) : 0)); + TRY(decode_segment_headers(*plugin->m_context, data)); + return plugin; } @@ -306,12 +304,14 @@ ErrorOr JBIG2ImageDecoderPlugin::frame(size_t index, Optio return Error::from_string_literal("JBIG2ImageDecoderPlugin: Draw the rest of the owl"); } -ErrorOr JBIG2ImageDecoderPlugin::decode_embedded(ReadonlyBytes data) +ErrorOr JBIG2ImageDecoderPlugin::decode_embedded(Vector data) { - dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: Decoding embedded JBIG2 of size {}", data.size()); - auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin(data))); + auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin())); plugin->m_context->organization = Organization::Embedded; - TRY(decode_segment_headers(*plugin->m_context)); + + for (auto const& segment_data : data) + TRY(decode_segment_headers(*plugin->m_context, segment_data)); + return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode embedded JBIG2 yet"); } diff --git a/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.h b/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.h index 2203a27df6..1841ed8be6 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.h +++ b/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.h @@ -25,10 +25,10 @@ public: virtual ErrorOr frame(size_t index, Optional ideal_size = {}) override; - static ErrorOr decode_embedded(ReadonlyBytes); + static ErrorOr decode_embedded(Vector); private: - JBIG2ImageDecoderPlugin(ReadonlyBytes); + JBIG2ImageDecoderPlugin(); OwnPtr m_context; }; diff --git a/Userland/Libraries/LibPDF/Filter.cpp b/Userland/Libraries/LibPDF/Filter.cpp index b12cd85f86..88e57ca845 100644 --- a/Userland/Libraries/LibPDF/Filter.cpp +++ b/Userland/Libraries/LibPDF/Filter.cpp @@ -19,7 +19,7 @@ namespace PDF { -PDFErrorOr Filter::decode(ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr decode_parms) +PDFErrorOr Filter::decode(Document* document, ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr decode_parms) { if (encoding_type == CommonNames::ASCIIHexDecode) return decode_ascii_hex(bytes); @@ -34,7 +34,7 @@ PDFErrorOr Filter::decode(ReadonlyBytes bytes, DeprecatedFlyString c if (encoding_type == CommonNames::CCITTFaxDecode) return decode_ccitt(bytes, decode_parms); if (encoding_type == CommonNames::JBIG2Decode) - return decode_jbig2(bytes, decode_parms); + return decode_jbig2(document, bytes, decode_parms); if (encoding_type == CommonNames::DCTDecode) return decode_dct(bytes); if (encoding_type == CommonNames::JPXDecode) @@ -334,15 +334,19 @@ PDFErrorOr Filter::decode_ccitt(ReadonlyBytes bytes, RefPtr Filter::decode_jbig2(ReadonlyBytes bytes, RefPtr decode_parms) +PDFErrorOr Filter::decode_jbig2(Document* document, ReadonlyBytes bytes, RefPtr decode_parms) { // 3.3.6 JBIG2Decode Filter + Vector segments; if (decode_parms) { - if (decode_parms->contains(CommonNames::JBIG2Globals)) - return Error::rendering_unsupported_error("JBIG2Globals is not yet supported"); + if (decode_parms->contains(CommonNames::JBIG2Globals)) { + auto globals = TRY(decode_parms->get_stream(document, CommonNames::JBIG2Globals)); + segments.append(globals->bytes()); + } } - return TRY(Gfx::JBIG2ImageDecoderPlugin::decode_embedded(bytes)); + segments.append(bytes); + return TRY(Gfx::JBIG2ImageDecoderPlugin::decode_embedded(segments)); } PDFErrorOr Filter::decode_dct(ReadonlyBytes bytes) diff --git a/Userland/Libraries/LibPDF/Filter.h b/Userland/Libraries/LibPDF/Filter.h index 17a453b694..a9f5607264 100644 --- a/Userland/Libraries/LibPDF/Filter.h +++ b/Userland/Libraries/LibPDF/Filter.h @@ -15,7 +15,7 @@ namespace PDF { class Filter { public: - static PDFErrorOr decode(ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr decode_parms); + static PDFErrorOr decode(Document* document, ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr decode_parms); private: static PDFErrorOr decode_ascii_hex(ReadonlyBytes bytes); @@ -26,7 +26,7 @@ private: static PDFErrorOr decode_flate(ReadonlyBytes bytes, RefPtr decode_parms); static PDFErrorOr decode_run_length(ReadonlyBytes bytes); static PDFErrorOr decode_ccitt(ReadonlyBytes bytes, RefPtr decode_parms); - static PDFErrorOr decode_jbig2(ReadonlyBytes bytes, RefPtr decode_parms); + static PDFErrorOr decode_jbig2(Document* document, ReadonlyBytes bytes, RefPtr decode_parms); static PDFErrorOr decode_dct(ReadonlyBytes bytes); static PDFErrorOr decode_jpx(ReadonlyBytes bytes); static PDFErrorOr decode_crypt(ReadonlyBytes bytes); diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index db1e85d312..dbc4c9f80e 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -460,7 +460,7 @@ PDFErrorOr Parser::unfilter_stream(NonnullRefPtr stream_obje if (!decode_parms_vector.is_empty()) decode_parms = decode_parms_vector.at(i); - stream_object->buffer() = TRY(Filter::decode(stream_object->bytes(), filters.at(i), decode_parms)); + stream_object->buffer() = TRY(Filter::decode(m_document, stream_object->bytes(), filters.at(i), decode_parms)); } return {};