/* * Copyright (c) 2021, Matthew Olsson * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include namespace PDF { struct Rectangle { float lower_left_x; float lower_left_y; float upper_right_x; float upper_right_y; }; struct Page { NonnullRefPtr resources; NonnullRefPtr contents; Rectangle media_box; Rectangle crop_box; float user_unit; int rotate; }; class Document final : public RefCounted { public: explicit Document(const ReadonlyBytes& bytes); ALWAYS_INLINE const XRefTable& xref_table() const { return m_xref_table; } ALWAYS_INLINE const DictObject& trailer() const { return *m_trailer; } [[nodiscard]] Value get_or_load_value(u32 index); [[nodiscard]] u32 get_first_page_index() const; [[nodiscard]] u32 get_page_count() const; [[nodiscard]] Page get_page(u32 index); ALWAYS_INLINE Value get_value(u32 index) const { return m_values.get(index).value_or({}); } ALWAYS_INLINE void set_value(u32 index, const Value& value) { m_values.ensure_capacity(index); m_values.set(index, value); } // Strips away the layer of indirection by turning indirect value // refs into the value they reference, and indirect values into // the value being wrapped. Value resolve(const Value& value); // Like resolve, but unwraps the Value into the given type. Accepts // any object type, and the three primitive Value types. template UnwrappedValueType resolve_to(const Value& value) { auto resolved = resolve(value); if constexpr (IsSame) return resolved.as_bool(); if constexpr (IsSame) return resolved.as_int(); if constexpr (IsSame) return resolved.as_float(); if constexpr (IsObject) return object_cast(resolved.as_object()); VERIFY_NOT_REACHED(); } private: // FIXME: Currently, to improve performance, we don't load any pages at Document // construction, rather we just load the page structure and populate // m_page_object_indices. However, we can be even lazier and defer page tree node // parsing, as good PDF writers will layout the page tree in a balanced tree to // improve lookup time. This would reduce the initial overhead by not loading // every page tree node of, say, a 1000+ page PDF file. void build_page_tree(); void add_page_tree_node_to_page_tree(NonnullRefPtr page_tree); Parser m_parser; XRefTable m_xref_table; RefPtr m_trailer; RefPtr m_catalog; Vector m_page_object_indices; HashMap m_pages; HashMap m_values; }; } namespace AK { template<> struct Formatter : Formatter { void format(FormatBuilder& builder, const PDF::Rectangle& rectangle) { Formatter::format(builder, String::formatted("Rectangle {{ ll=({}, {}), ur=({}, {}) }}", rectangle.lower_left_x, rectangle.lower_left_y, rectangle.upper_right_x, rectangle.upper_right_y)); } }; template<> struct Formatter : Formatter { void format(FormatBuilder& builder, const PDF::Page& page) { constexpr auto fmt_string = "Page {{\n resources={}\n contents={}\n media_box={}\n crop_box={}\n user_unit={}\n rotate={}\n}}"; auto str = String::formatted(fmt_string, page.resources->to_string(1), page.contents->to_string(1), page.media_box, page.crop_box, page.user_unit, page.rotate); Formatter::format(builder, str); } }; }