diff --git a/Base/res/pdf/complex.pdf b/Base/res/pdf/complex.pdf new file mode 100644 index 0000000000..4d7a58dcb6 --- /dev/null +++ b/Base/res/pdf/complex.pdf @@ -0,0 +1,93 @@ +%PDF-1.1 +1 0 obj +<< /Kids [2 0 R 3 0 R] /Type /Pages /Count 3 >> +endobj +4 0 obj +<< >> +stream +1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page One) Tj ET +endstream +endobj +2 0 obj +<< + /Rotate 0 + /Parent 1 0 R + /Resources + << /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >> + /MediaBox [0.000000 0.000000 595.275590551 841.88976378] + /Type /Page + /Contents [4 0 R] +>> +endobj +5 0 obj +<< /PageLayout /TwoColumnLeft /Pages 1 0 R /Type /Catalog >> +endobj +6 0 obj +<< + /Rotate 0 + /Parent 3 0 R + /Resources + << /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >> + /MediaBox [0.000000 0.000000 595.275590551 841.88976378] + /Type /Page + /Contents [7 0 R] +>> +endobj +3 0 obj +<< /Parent 1 0 R /Kids [8 0 R 6 0 R] /Count 2 /Type /Pages >> +endobj +8 0 obj +<< + /Rotate 270 + /Parent 3 0 R + /Resources + << /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >> + /MediaBox [0.000000 0.000000 595.275590551 841.88976378] + /Type /Page + /Contents [9 0 R] +>> +endobj +9 0 obj +<< >> +stream +q 1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Two) Tj ET Q +1. 0.000000 0.000000 1. 50. 750 cm BT /F0 16 Tf ((Rotated by 270 degrees)) Tj ET +endstream +endobj +7 0 obj +<< >> +stream +1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Three) Tj ET +endstream +endobj +10 0 obj +<< + /Title (PDF Explained Example) + /Author (John Whitington) + /Producer (Manually Created) + /ModDate (D:20110313002346Z) + /CreationDate (D:2011) +>> +endobj xref +0 11 +0000000000 65536 f +0000000009 00000 n +0000000177 00000 n +0000000731 00000 n +0000000072 00000 n +0000000416 00000 n +0000000492 00000 n +0000001239 00000 n +0000000808 00000 n +0000001049 00000 n +0000001346 00000 n +trailer +<< + /Info 10 0 R + /Root 5 0 R + /Size 11 + /ID [<75ff22189ceac848dfa2afec93deee03> <75ff22189ceac848dfa2afec93deee03>] +>> +startxref +1516 +%%EOF diff --git a/Base/res/pdf/linearized.pdf b/Base/res/pdf/linearized.pdf new file mode 100644 index 0000000000..34f8324f24 Binary files /dev/null and b/Base/res/pdf/linearized.pdf differ diff --git a/Base/res/pdf/non-linearized.pdf b/Base/res/pdf/non-linearized.pdf new file mode 100644 index 0000000000..774c2ea70c Binary files /dev/null and b/Base/res/pdf/non-linearized.pdf differ diff --git a/Userland/Libraries/CMakeLists.txt b/Userland/Libraries/CMakeLists.txt index ae9734ef40..548498ba85 100644 --- a/Userland/Libraries/CMakeLists.txt +++ b/Userland/Libraries/CMakeLists.txt @@ -26,6 +26,7 @@ add_subdirectory(LibLine) add_subdirectory(LibM) add_subdirectory(LibMarkdown) add_subdirectory(LibPCIDB) +add_subdirectory(LibPDF) add_subdirectory(LibProtocol) add_subdirectory(LibPthread) add_subdirectory(LibRegex) diff --git a/Userland/Libraries/LibPDF/CMakeLists.txt b/Userland/Libraries/LibPDF/CMakeLists.txt new file mode 100644 index 0000000000..15e1562f76 --- /dev/null +++ b/Userland/Libraries/LibPDF/CMakeLists.txt @@ -0,0 +1,7 @@ +set(SOURCES + Object.cpp + Value.cpp + ) + +serenity_lib(LibPDF pdf) +target_link_libraries(LibPDF LibC LibCore) diff --git a/Userland/Libraries/LibPDF/Forward.h b/Userland/Libraries/LibPDF/Forward.h new file mode 100644 index 0000000000..dd9825cff8 --- /dev/null +++ b/Userland/Libraries/LibPDF/Forward.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +namespace PDF { + +class Document; +class Object; + +#define ENUMERATE_DIRECT_OBJECT_TYPES(V) \ + V(StringObject, string) \ + V(NameObject, name) \ + V(ArrayObject, array) \ + V(DictObject, dict) \ + V(StreamObject, stream) \ + V(IndirectValue, indirect_value) + +#define ENUMERATE_OBJECT_TYPES(V) \ + ENUMERATE_DIRECT_OBJECT_TYPES(V) \ + V(IndirectValueRef, indirect_value_ref) + +#define FORWARD_DECL(class_name, _) class class_name; +ENUMERATE_OBJECT_TYPES(FORWARD_DECL) +#undef FORWARD_DECL + +template +concept IsObject = IsBaseOf; + +} diff --git a/Userland/Libraries/LibPDF/Object.cpp b/Userland/Libraries/LibPDF/Object.cpp new file mode 100644 index 0000000000..4fca01a497 --- /dev/null +++ b/Userland/Libraries/LibPDF/Object.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace PDF { + +static void append_indent(StringBuilder& builder, int indent) +{ + for (int i = 0; i < indent; i++) + builder.append(" "); +} + +String StringObject::to_string(int) const +{ + if (is_binary()) + return String::formatted("<{}>", encode_hex(string().bytes()).to_uppercase()); + return String::formatted("({})", string()); +} + +String NameObject::to_string(int) const +{ + StringBuilder builder; + builder.appendff("/{}", this->name()); + return builder.to_string(); +} + +String ArrayObject::to_string(int indent) const +{ + StringBuilder builder; + builder.append("[\n"); + bool first = true; + + for (auto& element : elements()) { + if (!first) + builder.append(",\n"); + first = false; + append_indent(builder, indent + 1); + builder.appendff("{}", element.to_string(indent)); + } + + builder.append('\n'); + append_indent(builder, indent); + builder.append(']'); + return builder.to_string(); +} + +String DictObject::to_string(int indent) const +{ + StringBuilder builder; + builder.append("<<\n"); + bool first = true; + + for (auto& [key, value] : map()) { + if (!first) + builder.append(",\n"); + first = false; + append_indent(builder, indent + 1); + builder.appendff("/{} ", key); + builder.appendff("{}", value.to_string(indent + 1)); + } + + builder.append('\n'); + append_indent(builder, indent); + builder.append(">>"); + return builder.to_string(); +} + +String StreamObject::to_string(int indent) const +{ + StringBuilder builder; + builder.append("stream\n"); + append_indent(builder, indent); + builder.appendff("{}\n", dict()->to_string(indent + 1)); + append_indent(builder, indent + 1); + + auto string = encode_hex(bytes()); + while (true) { + if (string.length() > 60) { + builder.appendff("{}\n", string.substring(0, 60)); + append_indent(builder, indent); + string = string.substring(60); + continue; + } + + builder.appendff("{}\n", string); + break; + } + + append_indent(builder, indent); + builder.append("endstream"); + return builder.to_string(); +} + +String IndirectValue::to_string(int indent) const +{ + StringBuilder builder; + builder.appendff("{} {} obj\n", index(), generation_index()); + append_indent(builder, indent + 1); + builder.append(value().to_string(indent + 1)); + builder.append('\n'); + append_indent(builder, indent); + builder.append("endobj"); + return builder.to_string(); +} + +String IndirectValueRef::to_string(int) const +{ + return String::formatted("{} {} R", index(), generation_index()); +} + +} diff --git a/Userland/Libraries/LibPDF/Object.h b/Userland/Libraries/LibPDF/Object.h new file mode 100644 index 0000000000..088204e397 --- /dev/null +++ b/Userland/Libraries/LibPDF/Object.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace PDF { + +class Object : public RefCounted { +public: + virtual ~Object() = default; + + [[nodiscard]] ALWAYS_INLINE u32 generation_index() const { return m_generation_index; } + ALWAYS_INLINE void set_generation_index(u32 generation_index) { m_generation_index = generation_index; } + +#define DEFINE_ID(_, name) \ + virtual bool is_##name() const { return false; } + ENUMERATE_OBJECT_TYPES(DEFINE_ID) +#undef DEFINE_ID + + virtual String to_string(int indent) const = 0; + +private: + u32 m_generation_index { 0 }; +}; + +class StringObject final : public Object { +public: + StringObject(String string, bool is_binary) + : m_string(move(string)) + , m_is_binary(is_binary) + { + } + + ~StringObject() override = default; + + [[nodiscard]] ALWAYS_INLINE const String& string() const { return m_string; } + [[nodiscard]] ALWAYS_INLINE bool is_binary() const { return m_is_binary; } + + ALWAYS_INLINE bool is_string() const override { return true; } + String to_string(int indent) const override; + +private: + String m_string; + bool m_is_binary; +}; + +class NameObject final : public Object { +public: + explicit NameObject(FlyString name) + : m_name(move(name)) + { + } + + ~NameObject() override = default; + + [[nodiscard]] ALWAYS_INLINE FlyString name() const { return m_name; } + + ALWAYS_INLINE bool is_name() const override { return true; } + String to_string(int indent) const override; + +private: + FlyString m_name; +}; + +class ArrayObject final : public Object { +public: + explicit ArrayObject(Vector elements) + : m_elements(move(elements)) + { + } + + ~ArrayObject() override = default; + + [[nodiscard]] ALWAYS_INLINE Vector elements() const { return m_elements; } + + ALWAYS_INLINE bool is_array() const override { return true; } + String to_string(int indent) const override; + +private: + Vector m_elements; +}; + +class DictObject final : public Object { +public: + explicit DictObject(HashMap map) + : m_map(move(map)) + { + } + + ~DictObject() override = default; + + [[nodiscard]] ALWAYS_INLINE HashMap map() const { return m_map; } + + ALWAYS_INLINE bool is_dict() const override { return true; } + String to_string(int indent) const override; + +private: + HashMap m_map; +}; + +class StreamObject final : public Object { +public: + StreamObject(const NonnullRefPtr& dict, const ReadonlyBytes& bytes) + : m_dict(dict) + , m_bytes(bytes) + { + } + + ~StreamObject() override = default; + + [[nodiscard]] ALWAYS_INLINE NonnullRefPtr dict() const { return m_dict; } + [[nodiscard]] ALWAYS_INLINE const ReadonlyBytes& bytes() const { return m_bytes; } + + ALWAYS_INLINE bool is_stream() const override { return true; } + String to_string(int indent) const override; + +private: + NonnullRefPtr m_dict; + ReadonlyBytes m_bytes; +}; + +class IndirectValue final : public Object { +public: + IndirectValue(u32 index, u32 generation_index, const Value& value) + : m_index(index) + , m_value(value) + { + set_generation_index(generation_index); + } + + ~IndirectValue() override = default; + + [[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; } + [[nodiscard]] ALWAYS_INLINE const Value& value() const { return m_value; } + + ALWAYS_INLINE bool is_indirect_value() const override { return true; } + String to_string(int indent) const override; + +private: + u32 m_index; + Value m_value; +}; + +class IndirectValueRef final : public Object { +public: + IndirectValueRef(u32 index, u32 generation_index) + : m_index(index) + { + set_generation_index(generation_index); + } + + ~IndirectValueRef() override = default; + + [[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; } + + ALWAYS_INLINE bool is_indirect_value_ref() const override { return true; } + String to_string(int indent) const override; + +private: + u32 m_index; +}; + +} + +namespace AK { + +template +struct Formatter : Formatter { + void format(FormatBuilder& builder, const T& object) + { + Formatter::format(builder, object.to_string(0)); + } +}; + +template +struct Formatter> : Formatter { + void format(FormatBuilder& builder, const NonnullRefPtr& object) + { + Formatter::format(builder, *object); + } +}; + +} diff --git a/Userland/Libraries/LibPDF/Value.cpp b/Userland/Libraries/LibPDF/Value.cpp new file mode 100644 index 0000000000..8d79bb9743 --- /dev/null +++ b/Userland/Libraries/LibPDF/Value.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace PDF { + +Value::~Value() +{ + if (is_object()) + m_as_object->unref(); +} + +Value& Value::operator=(const Value& other) +{ + m_type = other.m_type; + switch (m_type) { + case Type::Null: + break; + case Type::Bool: + m_as_bool = other.m_as_bool; + break; + case Type::Int: + m_as_int = other.m_as_int; + break; + case Type::Float: + m_as_float = other.m_as_float; + break; + case Type::Object: + m_as_object = other.m_as_object; + if (m_as_object) + m_as_object->ref(); + break; + } + return *this; +} + +String Value::to_string(int indent) const +{ + switch (m_type) { + case Type::Null: + return "null"; + case Type::Bool: + return as_bool() ? "true" : "false"; + case Type::Int: + return String::number(as_int()); + case Type::Float: + return String::number(as_float()); + case Type::Object: + return as_object()->to_string(indent); + } + + VERIFY_NOT_REACHED(); +} + +} diff --git a/Userland/Libraries/LibPDF/Value.h b/Userland/Libraries/LibPDF/Value.h new file mode 100644 index 0000000000..6999902bfc --- /dev/null +++ b/Userland/Libraries/LibPDF/Value.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace PDF { + +class Object; + +class Value { +public: + Value() + : m_type(Type::Null) + { + } + + Value(bool b) + : m_type(Type::Bool) + { + m_as_bool = b; + } + + Value(int i) + : m_type(Type::Int) + { + m_as_int = i; + } + + Value(float f) + : m_type(Type::Float) + { + m_as_float = f; + } + + template + Value(NonnullRefPtr obj) + : m_type(Type::Object) + { + obj->ref(); + m_as_object = obj; + } + + Value(const Value& other) + { + *this = other; + } + + ~Value(); + + Value& operator=(const Value& other); + + [[nodiscard]] ALWAYS_INLINE bool is_null() const { return m_type == Type::Null; } + [[nodiscard]] ALWAYS_INLINE bool is_bool() const { return m_type == Type::Bool; } + [[nodiscard]] ALWAYS_INLINE bool is_int() const { return m_type == Type::Int; } + [[nodiscard]] ALWAYS_INLINE bool is_float() const { return m_type == Type::Float; } + [[nodiscard]] ALWAYS_INLINE bool is_number() const { return is_int() || is_float(); } + + [[nodiscard]] ALWAYS_INLINE bool is_object() const { return m_type == Type::Object; } + + [[nodiscard]] ALWAYS_INLINE bool as_bool() const + { + VERIFY(is_bool()); + return m_as_bool; + } + + [[nodiscard]] ALWAYS_INLINE int as_int() const + { + VERIFY(is_int()); + return m_as_int; + } + + [[nodiscard]] ALWAYS_INLINE int to_int() const + { + if (is_int()) + return as_int(); + return static_cast(as_float()); + } + + [[nodiscard]] ALWAYS_INLINE float as_float() const + { + VERIFY(is_float()); + return m_as_float; + } + + [[nodiscard]] ALWAYS_INLINE float to_float() const + { + if (is_float()) + return as_float(); + return static_cast(as_int()); + } + + [[nodiscard]] ALWAYS_INLINE NonnullRefPtr as_object() const { return *m_as_object; } + + [[nodiscard]] ALWAYS_INLINE explicit operator bool() const { return !is_null(); } + + [[nodiscard]] String to_string(int indent = 0) const; + +private: + enum class Type { + Null, + Bool, + Int, + Float, + Object, + }; + + union { + bool m_as_bool; + int m_as_int; + float m_as_float; + Object* m_as_object; + }; + + Type m_type; +}; + +} + +namespace AK { + +template<> +struct Formatter : Formatter { + void format(FormatBuilder& builder, const PDF::Value& value) + { + Formatter::format(builder, value.to_string()); + } +}; + +}