1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 21:07:34 +00:00

LibPDF: Create basic object structure

This commit is the start of LibPDF, and introduces some basic structure
objects. This emulates LibJS's Value structure, where Value is a simple
class that can contain a pointer to a more complex Object class with
more data. All of the basic PDF objects have a representation.
This commit is contained in:
Matthew Olsson 2021-04-30 18:23:17 -07:00 committed by Andreas Kling
parent af9a7b1374
commit a8f5b6aaa3
10 changed files with 637 additions and 0 deletions

View file

@ -0,0 +1,7 @@
set(SOURCES
Object.cpp
Value.cpp
)
serenity_lib(LibPDF pdf)
target_link_libraries(LibPDF LibC LibCore)

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
namespace PDF {
class Document;
class Object;
#define ENUMERATE_DIRECT_OBJECT_TYPES(V) \
V(StringObject, string) \
V(NameObject, name) \
V(ArrayObject, array) \
V(DictObject, dict) \
V(StreamObject, stream) \
V(IndirectValue, indirect_value)
#define ENUMERATE_OBJECT_TYPES(V) \
ENUMERATE_DIRECT_OBJECT_TYPES(V) \
V(IndirectValueRef, indirect_value_ref)
#define FORWARD_DECL(class_name, _) class class_name;
ENUMERATE_OBJECT_TYPES(FORWARD_DECL)
#undef FORWARD_DECL
template<typename T>
concept IsObject = IsBaseOf<Object, T>;
}

View file

@ -0,0 +1,116 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Hex.h>
#include <LibPDF/Object.h>
namespace PDF {
static void append_indent(StringBuilder& builder, int indent)
{
for (int i = 0; i < indent; i++)
builder.append(" ");
}
String StringObject::to_string(int) const
{
if (is_binary())
return String::formatted("<{}>", encode_hex(string().bytes()).to_uppercase());
return String::formatted("({})", string());
}
String NameObject::to_string(int) const
{
StringBuilder builder;
builder.appendff("/{}", this->name());
return builder.to_string();
}
String ArrayObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("[\n");
bool first = true;
for (auto& element : elements()) {
if (!first)
builder.append(",\n");
first = false;
append_indent(builder, indent + 1);
builder.appendff("{}", element.to_string(indent));
}
builder.append('\n');
append_indent(builder, indent);
builder.append(']');
return builder.to_string();
}
String DictObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("<<\n");
bool first = true;
for (auto& [key, value] : map()) {
if (!first)
builder.append(",\n");
first = false;
append_indent(builder, indent + 1);
builder.appendff("/{} ", key);
builder.appendff("{}", value.to_string(indent + 1));
}
builder.append('\n');
append_indent(builder, indent);
builder.append(">>");
return builder.to_string();
}
String StreamObject::to_string(int indent) const
{
StringBuilder builder;
builder.append("stream\n");
append_indent(builder, indent);
builder.appendff("{}\n", dict()->to_string(indent + 1));
append_indent(builder, indent + 1);
auto string = encode_hex(bytes());
while (true) {
if (string.length() > 60) {
builder.appendff("{}\n", string.substring(0, 60));
append_indent(builder, indent);
string = string.substring(60);
continue;
}
builder.appendff("{}\n", string);
break;
}
append_indent(builder, indent);
builder.append("endstream");
return builder.to_string();
}
String IndirectValue::to_string(int indent) const
{
StringBuilder builder;
builder.appendff("{} {} obj\n", index(), generation_index());
append_indent(builder, indent + 1);
builder.append(value().to_string(indent + 1));
builder.append('\n');
append_indent(builder, indent);
builder.append("endobj");
return builder.to_string();
}
String IndirectValueRef::to_string(int) const
{
return String::formatted("{} {} R", index(), generation_index());
}
}

View file

@ -0,0 +1,193 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/FlyString.h>
#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/RefCounted.h>
#include <LibPDF/Forward.h>
#include <LibPDF/Value.h>
namespace PDF {
class Object : public RefCounted<Object> {
public:
virtual ~Object() = default;
[[nodiscard]] ALWAYS_INLINE u32 generation_index() const { return m_generation_index; }
ALWAYS_INLINE void set_generation_index(u32 generation_index) { m_generation_index = generation_index; }
#define DEFINE_ID(_, name) \
virtual bool is_##name() const { return false; }
ENUMERATE_OBJECT_TYPES(DEFINE_ID)
#undef DEFINE_ID
virtual String to_string(int indent) const = 0;
private:
u32 m_generation_index { 0 };
};
class StringObject final : public Object {
public:
StringObject(String string, bool is_binary)
: m_string(move(string))
, m_is_binary(is_binary)
{
}
~StringObject() override = default;
[[nodiscard]] ALWAYS_INLINE const String& string() const { return m_string; }
[[nodiscard]] ALWAYS_INLINE bool is_binary() const { return m_is_binary; }
ALWAYS_INLINE bool is_string() const override { return true; }
String to_string(int indent) const override;
private:
String m_string;
bool m_is_binary;
};
class NameObject final : public Object {
public:
explicit NameObject(FlyString name)
: m_name(move(name))
{
}
~NameObject() override = default;
[[nodiscard]] ALWAYS_INLINE FlyString name() const { return m_name; }
ALWAYS_INLINE bool is_name() const override { return true; }
String to_string(int indent) const override;
private:
FlyString m_name;
};
class ArrayObject final : public Object {
public:
explicit ArrayObject(Vector<Value> elements)
: m_elements(move(elements))
{
}
~ArrayObject() override = default;
[[nodiscard]] ALWAYS_INLINE Vector<Value> elements() const { return m_elements; }
ALWAYS_INLINE bool is_array() const override { return true; }
String to_string(int indent) const override;
private:
Vector<Value> m_elements;
};
class DictObject final : public Object {
public:
explicit DictObject(HashMap<FlyString, Value> map)
: m_map(move(map))
{
}
~DictObject() override = default;
[[nodiscard]] ALWAYS_INLINE HashMap<FlyString, Value> map() const { return m_map; }
ALWAYS_INLINE bool is_dict() const override { return true; }
String to_string(int indent) const override;
private:
HashMap<FlyString, Value> m_map;
};
class StreamObject final : public Object {
public:
StreamObject(const NonnullRefPtr<DictObject>& dict, const ReadonlyBytes& bytes)
: m_dict(dict)
, m_bytes(bytes)
{
}
~StreamObject() override = default;
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<DictObject> dict() const { return m_dict; }
[[nodiscard]] ALWAYS_INLINE const ReadonlyBytes& bytes() const { return m_bytes; }
ALWAYS_INLINE bool is_stream() const override { return true; }
String to_string(int indent) const override;
private:
NonnullRefPtr<DictObject> m_dict;
ReadonlyBytes m_bytes;
};
class IndirectValue final : public Object {
public:
IndirectValue(u32 index, u32 generation_index, const Value& value)
: m_index(index)
, m_value(value)
{
set_generation_index(generation_index);
}
~IndirectValue() override = default;
[[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; }
[[nodiscard]] ALWAYS_INLINE const Value& value() const { return m_value; }
ALWAYS_INLINE bool is_indirect_value() const override { return true; }
String to_string(int indent) const override;
private:
u32 m_index;
Value m_value;
};
class IndirectValueRef final : public Object {
public:
IndirectValueRef(u32 index, u32 generation_index)
: m_index(index)
{
set_generation_index(generation_index);
}
~IndirectValueRef() override = default;
[[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; }
ALWAYS_INLINE bool is_indirect_value_ref() const override { return true; }
String to_string(int indent) const override;
private:
u32 m_index;
};
}
namespace AK {
template<PDF::IsObject T>
struct Formatter<T> : Formatter<StringView> {
void format(FormatBuilder& builder, const T& object)
{
Formatter<StringView>::format(builder, object.to_string(0));
}
};
template<PDF::IsObject T>
struct Formatter<NonnullRefPtr<T>> : Formatter<T> {
void format(FormatBuilder& builder, const NonnullRefPtr<T>& object)
{
Formatter<T>::format(builder, *object);
}
};
}

View file

@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibPDF/Object.h>
#include <LibPDF/Value.h>
namespace PDF {
Value::~Value()
{
if (is_object())
m_as_object->unref();
}
Value& Value::operator=(const Value& other)
{
m_type = other.m_type;
switch (m_type) {
case Type::Null:
break;
case Type::Bool:
m_as_bool = other.m_as_bool;
break;
case Type::Int:
m_as_int = other.m_as_int;
break;
case Type::Float:
m_as_float = other.m_as_float;
break;
case Type::Object:
m_as_object = other.m_as_object;
if (m_as_object)
m_as_object->ref();
break;
}
return *this;
}
String Value::to_string(int indent) const
{
switch (m_type) {
case Type::Null:
return "null";
case Type::Bool:
return as_bool() ? "true" : "false";
case Type::Int:
return String::number(as_int());
case Type::Float:
return String::number(as_float());
case Type::Object:
return as_object()->to_string(indent);
}
VERIFY_NOT_REACHED();
}
}

View file

@ -0,0 +1,134 @@
/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Format.h>
namespace PDF {
class Object;
class Value {
public:
Value()
: m_type(Type::Null)
{
}
Value(bool b)
: m_type(Type::Bool)
{
m_as_bool = b;
}
Value(int i)
: m_type(Type::Int)
{
m_as_int = i;
}
Value(float f)
: m_type(Type::Float)
{
m_as_float = f;
}
template<IsObject T>
Value(NonnullRefPtr<T> obj)
: m_type(Type::Object)
{
obj->ref();
m_as_object = obj;
}
Value(const Value& other)
{
*this = other;
}
~Value();
Value& operator=(const Value& other);
[[nodiscard]] ALWAYS_INLINE bool is_null() const { return m_type == Type::Null; }
[[nodiscard]] ALWAYS_INLINE bool is_bool() const { return m_type == Type::Bool; }
[[nodiscard]] ALWAYS_INLINE bool is_int() const { return m_type == Type::Int; }
[[nodiscard]] ALWAYS_INLINE bool is_float() const { return m_type == Type::Float; }
[[nodiscard]] ALWAYS_INLINE bool is_number() const { return is_int() || is_float(); }
[[nodiscard]] ALWAYS_INLINE bool is_object() const { return m_type == Type::Object; }
[[nodiscard]] ALWAYS_INLINE bool as_bool() const
{
VERIFY(is_bool());
return m_as_bool;
}
[[nodiscard]] ALWAYS_INLINE int as_int() const
{
VERIFY(is_int());
return m_as_int;
}
[[nodiscard]] ALWAYS_INLINE int to_int() const
{
if (is_int())
return as_int();
return static_cast<int>(as_float());
}
[[nodiscard]] ALWAYS_INLINE float as_float() const
{
VERIFY(is_float());
return m_as_float;
}
[[nodiscard]] ALWAYS_INLINE float to_float() const
{
if (is_float())
return as_float();
return static_cast<float>(as_int());
}
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<Object> as_object() const { return *m_as_object; }
[[nodiscard]] ALWAYS_INLINE explicit operator bool() const { return !is_null(); }
[[nodiscard]] String to_string(int indent = 0) const;
private:
enum class Type {
Null,
Bool,
Int,
Float,
Object,
};
union {
bool m_as_bool;
int m_as_int;
float m_as_float;
Object* m_as_object;
};
Type m_type;
};
}
namespace AK {
template<>
struct Formatter<PDF::Value> : Formatter<StringView> {
void format(FormatBuilder& builder, const PDF::Value& value)
{
Formatter<StringView>::format(builder, value.to_string());
}
};
}