From e16345555b63237443a0e1692e304d0ec22fa830 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 3 Jan 2024 17:30:02 -0500 Subject: [PATCH] LibPDF: Port 59b50fa43f8c2 to xref and object streams 0000440.pdf contains an xref stream object (at offset 3643676) starting: ``` 294 0 obj << /Type /XRef /Index [0 295] /Size 295 ``` and an object stream object (at offset 3640121) starting: ``` 230 0 obj << /Type /ObjStm /N 73 /First 614 ``` In both cases, the `obj` and the `<<` are separated by non-newline whitespace. 633e1632d01a854 made parse_indirect_value() tolerate this, but it didn't update neither parse_xref_stream() (which parses xref streams) nor parse_compressed_object_with_index() (which parses object streams), despite all three changes being part of #14873. Make parse_xref_stream() and parse_compressed_object_with_index() call parse_indirect_value() to pick up the fix over there. It's a bit less code too. (0000440.pdf is the only PDF in my 1000 test PDFs that this helps, somewhat surprisingly.) --- Userland/Libraries/LibPDF/DocumentParser.cpp | 44 ++++++++------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 92ae52db1e..a1e9944ef7 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -390,18 +390,23 @@ PDFErrorOr DocumentParser::validate_xref_table_and_fix_if_necessary() return {}; } +static PDFErrorOr> indirect_value_as_stream(NonnullRefPtr indirect_value) +{ + auto value = indirect_value->value(); + if (!value.has>()) + return Error { Error::Type::Parse, "Expected indirect value to be a stream" }; + auto value_object = value.get>(); + if (!value_object->is()) + return Error { Error::Type::Parse, "Expected indirect value to be a stream" }; + return value_object->cast(); +} + PDFErrorOr> DocumentParser::parse_xref_stream() { - auto first_number = TRY(parse_number()); - auto second_number = TRY(parse_number()); + auto xref_stream = TRY(parse_indirect_value()); + auto stream = TRY(indirect_value_as_stream(xref_stream)); - if (!m_reader.matches("obj")) - return error("Malformed xref object"); - m_reader.move_by(3); - if (m_reader.matches_eol()) - m_reader.consume_eol(); - - auto dict = TRY(parse_dict()); + auto dict = stream->dict(); auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name(); if (type != "XRef") return error("Malformed xref dictionary"); @@ -425,7 +430,6 @@ PDFErrorOr> DocumentParser::parse_xref_stream() } else { subsections.append({ 0, number_of_object_entries }); } - auto stream = TRY(parse_stream(dict)); auto table = adopt_ref(*new XRefTable()); auto field_to_long = [](ReadonlyBytes field) -> long { @@ -562,22 +566,13 @@ PDFErrorOr DocumentParser::parse_compressed_object_with_index(u32 index) m_reader.move_to(stream_offset); - auto first_number = TRY(parse_number()); - auto second_number = TRY(parse_number()); + auto obj_stream = TRY(parse_indirect_value()); + auto stream = TRY(indirect_value_as_stream(obj_stream)); - if (first_number.get() != object_stream_index) + if (obj_stream->index() != object_stream_index) return error("Mismatching object stream index"); - if (second_number.get() != 0) - return error("Non-zero object stream generation number"); - if (!m_reader.matches("obj")) - return error("Malformed object stream"); - m_reader.move_by(3); - if (m_reader.matches_eol()) - m_reader.consume_eol(); - - push_reference({ static_cast(first_number.get()), static_cast(second_number.get()) }); - auto dict = TRY(parse_dict()); + auto dict = stream->dict(); auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name(); if (type != "ObjStm") @@ -586,9 +581,6 @@ PDFErrorOr DocumentParser::parse_compressed_object_with_index(u32 index) auto object_count = dict->get_value("N").get_u32(); auto first_object_offset = dict->get_value("First").get_u32(); - auto stream = TRY(parse_stream(dict)); - pop_reference(); - Parser stream_parser(m_document, stream->bytes()); // The data was already decrypted when reading the outer compressed ObjStm.