1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-23 12:35:07 +00:00
serenity/Userland/Libraries/LibPDF/Parser.h
Nico Weber 39b2eed3f6 LibPDF: Do not crash on encrypted files that start unluckily
PDF files can be linearized. In that case, they start with a
"linearization dict" that stores the key `/Linearized` and the value
`1`. To check if a file is linearized, we just read the first dict, and
then checked if it has that key.

If the first object of a PDF was a stream with a compression filter
and the input PDF was encrypted and not linearized, then us trying to
decode the linearization dict could crash due to stream contents being
encrypted, decryption state not yet being initialized, and us trying
to decompress stream data before decrypting it.

To prevent this, disable uncompression when parsing the first object
to determine if it's a lineralization dictionary.

(A linearization dict never stores string values, so decryption
not yet being initialized is not a problem. Integer values aren't
encrypted in encrypted PDF files.)
2023-07-12 06:28:15 +02:00

84 lines
2.3 KiB
C++

/*
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/SourceLocation.h>
#include <AK/WeakPtr.h>
#include <LibPDF/Object.h>
#include <LibPDF/Operator.h>
#include <LibPDF/Reader.h>
#include <LibPDF/XRefTable.h>
namespace PDF {
template<typename T, typename... Args>
static NonnullRefPtr<T> make_object(Args... args)
requires(IsBaseOf<Object, T>)
{
return adopt_ref(*new T(forward<Args>(args)...));
}
class Document;
class Parser {
public:
static PDFErrorOr<Vector<Operator>> parse_operators(Document*, ReadonlyBytes);
Parser(ReadonlyBytes);
Parser(Document*, ReadonlyBytes);
void set_document(WeakPtr<Document> const&);
DeprecatedString parse_comment();
void move_by(size_t count) { m_reader.move_by(count); }
void move_to(size_t offset) { m_reader.move_to(offset); }
enum class CanBeIndirectValue {
No,
Yes
};
PDFErrorOr<Value> parse_value(CanBeIndirectValue = CanBeIndirectValue::Yes);
PDFErrorOr<Value> parse_possible_indirect_value_or_ref();
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value(u32 index, u32 generation);
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value();
PDFErrorOr<Value> parse_number();
PDFErrorOr<NonnullRefPtr<NameObject>> parse_name();
NonnullRefPtr<StringObject> parse_string();
DeprecatedString parse_literal_string();
DeprecatedString parse_hex_string();
PDFErrorOr<NonnullRefPtr<ArrayObject>> parse_array();
PDFErrorOr<NonnullRefPtr<DictObject>> parse_dict();
PDFErrorOr<NonnullRefPtr<StreamObject>> parse_stream(NonnullRefPtr<DictObject> dict);
PDFErrorOr<Vector<Operator>> parse_operators();
void set_filters_enabled(bool enabled)
{
m_enable_filters = enabled;
}
protected:
void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
void pop_reference() { m_current_reference_stack.take_last(); }
Error error(
DeprecatedString const& message
#ifdef PDF_DEBUG
,
SourceLocation loc = SourceLocation::current()
#endif
) const;
Reader m_reader;
WeakPtr<Document> m_document;
Vector<Reference> m_current_reference_stack;
bool m_enable_encryption { true };
bool m_enable_filters { false };
};
};