diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 0dee963839..95e8c3b3a9 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021-2022, Matthew Olsson + * Copyright (c) 2022, Julian Offenhäuser * * SPDX-License-Identifier: BSD-2-Clause */ @@ -51,6 +52,11 @@ PDFErrorOr DocumentParser::initialize() PDFErrorOr DocumentParser::parse_object_with_index(u32 index) { VERIFY(m_xref_table->has_object(index)); + + if (m_xref_table->is_object_compressed(index)) + // The object can be found in a object stream + return parse_compressed_object_with_index(index); + auto byte_offset = m_xref_table->byte_offset_for_object(index); m_reader.move_to(byte_offset); auto indirect_value = TRY(parse_indirect_value()); @@ -440,6 +446,51 @@ PDFErrorOr> DocumentParser::parse_file_trailer() return dict; } +PDFErrorOr DocumentParser::parse_compressed_object_with_index(u32 index) +{ + auto object_stream_index = m_xref_table->object_stream_for_object(index); + auto stream_offset = m_xref_table->byte_offset_for_object(object_stream_index); + + m_reader.move_to(stream_offset); + + auto first_number = TRY(parse_number()); + auto second_number = TRY(parse_number()); + + if (first_number.get() != object_stream_index) + return error("Mismatching object stream index"); + if (second_number.get() != 0) + return error("Non-zero object stream generation number"); + + if (!m_reader.matches("obj")) + return error("Malformed object stream"); + m_reader.move_by(3); + if (m_reader.matches_eol()) + m_reader.consume_eol(); + + auto dict = TRY(parse_dict()); + auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name(); + if (type != "ObjStm") + return error("Invalid object stream type"); + + auto object_count = dict->get_value("N").get_u32(); + auto first_object_offset = dict->get_value("First").get_u32(); + + auto stream = TRY(parse_stream(dict)); + Parser stream_parser(m_document, stream->bytes()); + + for (u32 i = 0; i < object_count; ++i) { + auto object_number = TRY(stream_parser.parse_number()); + auto object_offset = TRY(stream_parser.parse_number()); + + if (object_number.get_u32() == index) { + stream_parser.move_to(first_object_offset + object_offset.get_u32()); + break; + } + } + + return TRY(stream_parser.parse_value()); +} + PDFErrorOr DocumentParser::parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes) { if (hint_stream_bytes.size() < sizeof(PageOffsetHintTable)) diff --git a/Userland/Libraries/LibPDF/DocumentParser.h b/Userland/Libraries/LibPDF/DocumentParser.h index 6b6814bb75..9f544f4b22 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.h +++ b/Userland/Libraries/LibPDF/DocumentParser.h @@ -85,6 +85,7 @@ private: PDFErrorOr> parse_xref_stream(); PDFErrorOr> parse_xref_table(); PDFErrorOr> parse_file_trailer(); + PDFErrorOr parse_compressed_object_with_index(u32 index); bool navigate_to_before_eof_marker(); bool navigate_to_after_startxref(); diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index 26eb6b9079..c9e7c317a7 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -35,6 +35,9 @@ public: String parse_comment(); + void move_by(size_t count) { m_reader.move_by(count); } + void move_to(size_t offset) { m_reader.move_to(offset); } + PDFErrorOr parse_value(); PDFErrorOr parse_possible_indirect_value_or_ref(); PDFErrorOr> parse_indirect_value(u32 index, u32 generation);