mirror of
https://github.com/RGBCube/serenity
synced 2025-07-28 19:27:36 +00:00
LibPDF: Parse page structures
This commit introduces the ability to parse the document catalog dict, as well as the page tree and individual pages. Pages obviously aren't fully parsed, as we won't care about most of the fields until we start actually rendering PDFs. One of the primary benefits of the PDF format is laziness. PDFs are not meant to be parsed all at once, and the same is true for pages. When a Document is constructed, it builds a map of page number to object index, but it does not fetch and parse any of the pages. A page is only parsed when a caller requests that particular page (and is cached going forwards). Additionally, this commit also adds an object_cast function which logs bad casts if DEBUG_PDF is set. Additionally, utility functions were added to ArrayObject and DictObject to get all types of objects from the collections to avoid having to manually cast.
This commit is contained in:
parent
72f693e9ed
commit
8c745ad0d9
11 changed files with 320 additions and 6 deletions
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Format.h>
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/RefCounted.h>
|
||||
#include <LibPDF/Object.h>
|
||||
#include <LibPDF/Parser.h>
|
||||
|
@ -13,6 +15,19 @@
|
|||
|
||||
namespace PDF {
|
||||
|
||||
struct Rectangle {
|
||||
float lower_left_x;
|
||||
float lower_left_y;
|
||||
float upper_right_x;
|
||||
float upper_right_y;
|
||||
};
|
||||
|
||||
struct Page {
|
||||
NonnullRefPtr<DictObject> resources;
|
||||
Rectangle media_box;
|
||||
NonnullRefPtr<Object> contents;
|
||||
};
|
||||
|
||||
class Document final : public RefCounted<Document> {
|
||||
public:
|
||||
explicit Document(const ReadonlyBytes& bytes);
|
||||
|
@ -21,6 +36,14 @@ public:
|
|||
|
||||
ALWAYS_INLINE const DictObject& trailer() const { return *m_trailer; }
|
||||
|
||||
[[nodiscard]] Value get_or_load_value(u32 index);
|
||||
|
||||
[[nodiscard]] u32 get_first_page_index() const;
|
||||
|
||||
[[nodiscard]] u32 get_page_count() const;
|
||||
|
||||
[[nodiscard]] Page get_page(u32 index);
|
||||
|
||||
ALWAYS_INLINE Value get_value(u32 index) const
|
||||
{
|
||||
return m_values.get(index).value_or({});
|
||||
|
@ -32,11 +55,53 @@ public:
|
|||
m_values.set(index, value);
|
||||
}
|
||||
|
||||
// Strips away the layer of indirection by turning indirect value
|
||||
// refs into the value they reference, and indirect values into
|
||||
// the value being wrapped.
|
||||
Value resolve(const Value& value);
|
||||
|
||||
// Like resolve, but unwraps the Value into the given type. Accepts
|
||||
// any object type, and the three primitive Value types.
|
||||
template<IsValueType T>
|
||||
UnwrappedValueType<T> resolve_to(const Value& value);
|
||||
|
||||
private:
|
||||
void build_page_tree();
|
||||
|
||||
Parser m_parser;
|
||||
XRefTable m_xref_table;
|
||||
RefPtr<DictObject> m_trailer;
|
||||
RefPtr<DictObject> m_catalog;
|
||||
Vector<u32> m_page_object_indices;
|
||||
HashMap<u32, Page> m_pages;
|
||||
HashMap<u32, Value> m_values;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace AK {
|
||||
|
||||
template<>
|
||||
struct Formatter<PDF::Rectangle> : Formatter<StringView> {
|
||||
void format(FormatBuilder& builder, const PDF::Rectangle& rectangle)
|
||||
{
|
||||
Formatter<StringView>::format(builder,
|
||||
String::formatted("Rectangle {{ ll=({}, {}), ur=({}, {}) }}",
|
||||
rectangle.lower_left_x,
|
||||
rectangle.lower_left_y,
|
||||
rectangle.upper_right_x,
|
||||
rectangle.upper_right_y));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Formatter<PDF::Page> : Formatter<StringView> {
|
||||
void format(FormatBuilder& builder, const PDF::Page& page)
|
||||
{
|
||||
constexpr auto fmt_string = "Page {{\n resources={}\n contents={}\n media_box={}\n}}";
|
||||
auto str = String::formatted(fmt_string, page.resources->to_string(1), page.contents->to_string(1), page.media_box);
|
||||
Formatter<StringView>::format(builder, str);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue