diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index deeb3ca46c..a127a4359f 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -39,17 +39,10 @@ RefPtr Document::create(const ReadonlyBytes& bytes) auto parser = adopt_ref(*new Parser({}, bytes)); auto document = adopt_ref(*new Document(parser)); - VERIFY(parser->perform_validation()); - auto xref_table_and_trailer_opt = parser->parse_last_xref_table_and_trailer(); - if (!xref_table_and_trailer_opt.has_value()) + if (!parser->initialize()) return {}; - auto [xref_table, trailer] = xref_table_and_trailer_opt.value(); - - document->m_xref_table = xref_table; - document->m_trailer = trailer; - - document->m_catalog = document->m_trailer->get_dict(document, CommonNames::Root); + document->m_catalog = parser->trailer()->get_dict(document, CommonNames::Root); document->build_page_tree(); document->build_outline(); @@ -68,13 +61,9 @@ Value Document::get_or_load_value(u32 index) if (value) return value; - VERIFY(m_xref_table.has_object(index)); - auto byte_offset = m_xref_table.byte_offset_for_object(index); - auto indirect_value = m_parser->parse_indirect_value_at_offset(byte_offset); - VERIFY(indirect_value->index() == index); - value = indirect_value->value(); - m_values.set(index, value); - return value; + auto object = m_parser->parse_object_with_index(index); + m_values.set(index, object); + return object; } u32 Document::get_first_page_index() const @@ -179,9 +168,8 @@ bool Document::add_page_tree_node_to_page_tree(NonnullRefPtr page_tr for (auto& value : *kids_array) { auto reference_index = value.as_ref_index(); - auto byte_offset = m_xref_table.byte_offset_for_object(reference_index); bool ok; - auto maybe_page_tree_node = m_parser->conditionally_parse_page_tree_node_at_offset(byte_offset, ok); + auto maybe_page_tree_node = m_parser->conditionally_parse_page_tree_node(reference_index, ok); if (!ok) return false; if (maybe_page_tree_node) { diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 655b0450eb..270c8c5b44 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -75,8 +75,6 @@ class Document final : public RefCounted { public: static RefPtr create(const ReadonlyBytes& bytes); - ALWAYS_INLINE const XRefTable& xref_table() const { return m_xref_table; } - ALWAYS_INLINE const DictObject& trailer() const { return *m_trailer; } ALWAYS_INLINE const RefPtr& outline() const { return m_outline; } [[nodiscard]] Value get_or_load_value(u32 index); @@ -92,12 +90,6 @@ public: return m_values.get(index).value_or({}); } - ALWAYS_INLINE void set_value(u32 index, const Value& value) - { - m_values.ensure_capacity(index); - m_values.set(index, value); - } - // Strips away the layer of indirection by turning indirect value // refs into the value they reference, and indirect values into // the value being wrapped. @@ -139,8 +131,6 @@ private: NonnullRefPtrVector build_outline_item_chain(const Value& first_ref, const Value& last_ref); NonnullRefPtr m_parser; - XRefTable m_xref_table; - RefPtr m_trailer; RefPtr m_catalog; Vector m_page_object_indices; HashMap m_pages; diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index f868629525..0f919618c2 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -38,43 +38,47 @@ Parser::Parser(const ReadonlyBytes& bytes) { } -bool Parser::perform_validation() +bool Parser::initialize() { - return !sloppy_is_linearized() && parse_header(); -} + if (!parse_header()) + return {}; -Optional Parser::parse_last_xref_table_and_trailer() -{ m_reader.move_to(m_reader.bytes().size() - 1); if (!navigate_to_before_eof_marker()) - return {}; + return false; if (!navigate_to_after_startxref()) - return {}; + return false; if (m_reader.done()) - return {}; + return false; m_reader.set_reading_forwards(); auto xref_offset_value = parse_number(); if (!xref_offset_value.is_int()) - return {}; + return false; auto xref_offset = xref_offset_value.as_int(); m_reader.move_to(xref_offset); auto xref_table = parse_xref_table(); if (!xref_table.has_value()) - return {}; + return false; auto trailer = parse_file_trailer(); if (!trailer) - return {}; + return false; - return XRefTableAndTrailer { xref_table.value(), trailer.release_nonnull() }; + m_xref_table = xref_table.value(); + m_trailer = trailer; + return true; } -RefPtr Parser::parse_indirect_value_at_offset(size_t offset) +Value Parser::parse_object_with_index(u32 index) { - m_reader.set_reading_forwards(); - m_reader.move_to(offset); - return parse_indirect_value(); + VERIFY(m_xref_table.has_object(index)); + auto byte_offset = m_xref_table.byte_offset_for_object(index); + m_reader.move_to(byte_offset); + auto indirect_value = parse_indirect_value(); + VERIFY(indirect_value); + VERIFY(indirect_value->index() == index); + return indirect_value->value(); } bool Parser::parse_header() @@ -647,11 +651,14 @@ RefPtr Parser::parse_dict() return make_object(map); } -RefPtr Parser::conditionally_parse_page_tree_node_at_offset(size_t offset, bool& ok) +RefPtr Parser::conditionally_parse_page_tree_node(u32 object_index, bool& ok) { ok = true; - m_reader.move_to(offset); + VERIFY(m_xref_table.has_object(object_index)); + auto byte_offset = m_xref_table.byte_offset_for_object(object_index); + + m_reader.move_to(byte_offset); parse_number(); parse_number(); if (!m_reader.matches("obj")) { diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index 6774acb93b..6227898a1d 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -22,19 +22,19 @@ public: Parser(Badge, const ReadonlyBytes&); - void set_document(RefPtr document) { m_document = document; } + [[nodiscard]] ALWAYS_INLINE const RefPtr& trailer() const { return m_trailer; } + void set_document(const RefPtr& document) { m_document = document; } - bool perform_validation(); + // Parses the header and initializes the xref table and trailer + bool initialize(); - struct XRefTableAndTrailer { - XRefTable xref_table; - NonnullRefPtr trailer; - }; - Optional parse_last_xref_table_and_trailer(); + Value parse_object_with_index(u32 index); - RefPtr parse_indirect_value_at_offset(size_t offset); - - RefPtr conditionally_parse_page_tree_node_at_offset(size_t offset, bool& ok); + // Specialized version of parse_dict which aborts early if the dict being parsed + // is not a page object. A null RefPtr return indicates that the dict at this index + // is not a page tree node, whereas ok == false indicates a malformed PDF file and + // should cause an abort of the current operation. + RefPtr conditionally_parse_page_tree_node(u32 object_index, bool& ok); private: explicit Parser(const ReadonlyBytes&); @@ -85,6 +85,8 @@ private: Reader m_reader; RefPtr m_document; + XRefTable m_xref_table; + RefPtr m_trailer; }; }