diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 47253a6b20..90181c28e3 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -124,7 +124,19 @@ PDFErrorOr DocumentParser::initialize_linea return LinearizationResult::NotLinearized; // At this point, we still don't know for sure if we are dealing with a valid object. + + // The linearization dict is read before decryption state is initialized. + // A linearization dict only contains numbers, so the decryption dictionary is not been needed (only strings and streams get decrypted, and only streams get unfiltered). + // But we don't know if the first object is a linearization dictionary until after parsing it, so the object might be a stream. + // If that stream is encrypted and filtered, we'd try to unfilter it while it's still encrypted, handing encrypted data to the unfiltering algorithms. + // This makes them assert, since they can't make sense of the encrypted data. + // So read the first object without unfiltering. + // If it is a linearization dict, there's no stream data and this has no effect. + // If it is a stream, this isn't a linearized file and the object will be read on demand (and unfiltered) later, when the object is lazily read via an xref entry. + set_filters_enabled(false); auto indirect_value_or_error = parse_indirect_value(); + set_filters_enabled(true); + if (indirect_value_or_error.is_error()) return LinearizationResult::NotLinearized; diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 5897846de2..43f4cf5373 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -474,7 +474,7 @@ PDFErrorOr> Parser::parse_stream(NonnullRefPtrsecurity_handler() && m_enable_encryption) m_document->security_handler()->decrypt(stream_object, m_current_reference_stack.last()); - if (dict->contains(CommonNames::Filter)) { + if (dict->contains(CommonNames::Filter) && m_enable_filters) { Vector filters; // We may either get a single filter or an array of cascading filters diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index a65dbd4491..64edd70e33 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -57,6 +57,11 @@ public: PDFErrorOr> parse_stream(NonnullRefPtr dict); PDFErrorOr> parse_operators(); + void set_filters_enabled(bool enabled) + { + m_enable_filters = enabled; + } + protected: void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); } void pop_reference() { m_current_reference_stack.take_last(); } @@ -73,6 +78,7 @@ protected: WeakPtr m_document; Vector m_current_reference_stack; bool m_enable_encryption { true }; + bool m_enable_filters { false }; }; };