1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-10-26 01:02:08 +00:00
serenity/Userland/Libraries/LibPDF/Parser.h
Nico Weber 323d76fbb9 LibPDF: Make encrypted object streams work
There were two problems:
1. parse_compressed_object_with_index() parses indirect objects
   without going through Parser::parse_indirect_value(), so
   push_reference() / pop_reference() weren't called.
   Manually call them, both for the indirect object containing
   the object stream and for the indirect object within the
   object stream.
2. The indirect object within the object stream got decrypted
   twice: Once when the object stream data itself got decrypted,
   and then incorrectly a second time when the object data within
   the stream was read. To fix, disable encryption while parsing
   object stream data (since it's already decrypted).

The test is from http://opf-labs.org/format-corpus/pdfCabinetOfHorrors/
which according to readme.md at the same location is CC0.
2023-07-12 17:16:25 +02:00

89 lines
2.4 KiB
C++

/*
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/SourceLocation.h>
#include <AK/WeakPtr.h>
#include <LibPDF/Object.h>
#include <LibPDF/Operator.h>
#include <LibPDF/Reader.h>
#include <LibPDF/XRefTable.h>
namespace PDF {
template<typename T, typename... Args>
static NonnullRefPtr<T> make_object(Args... args)
requires(IsBaseOf<Object, T>)
{
return adopt_ref(*new T(forward<Args>(args)...));
}
class Document;
class Parser {
public:
static PDFErrorOr<Vector<Operator>> parse_operators(Document*, ReadonlyBytes);
Parser(ReadonlyBytes);
Parser(Document*, ReadonlyBytes);
void set_document(WeakPtr<Document> const&);
DeprecatedString parse_comment();
void move_by(size_t count) { m_reader.move_by(count); }
void move_to(size_t offset) { m_reader.move_to(offset); }
enum class CanBeIndirectValue {
No,
Yes
};
PDFErrorOr<Value> parse_value(CanBeIndirectValue = CanBeIndirectValue::Yes);
PDFErrorOr<Value> parse_possible_indirect_value_or_ref();
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value(u32 index, u32 generation);
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value();
PDFErrorOr<Value> parse_number();
PDFErrorOr<NonnullRefPtr<NameObject>> parse_name();
NonnullRefPtr<StringObject> parse_string();
DeprecatedString parse_literal_string();
DeprecatedString parse_hex_string();
PDFErrorOr<NonnullRefPtr<ArrayObject>> parse_array();
PDFErrorOr<NonnullRefPtr<DictObject>> parse_dict();
PDFErrorOr<NonnullRefPtr<StreamObject>> parse_stream(NonnullRefPtr<DictObject> dict);
PDFErrorOr<Vector<Operator>> parse_operators();
void set_filters_enabled(bool enabled)
{
m_enable_filters = enabled;
}
void set_encryption_enabled(bool enabled)
{
m_enable_encryption = enabled;
}
void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
void pop_reference() { m_current_reference_stack.take_last(); }
protected:
Error error(
DeprecatedString const& message
#ifdef PDF_DEBUG
,
SourceLocation loc = SourceLocation::current()
#endif
) const;
Reader m_reader;
WeakPtr<Document> m_document;
Vector<Reference> m_current_reference_stack;
bool m_enable_encryption { true };
bool m_enable_filters { false };
};
};