1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 23:28:11 +00:00

LibPDF: Harden the document/parser against errors

This commit is contained in:
Matthew Olsson 2021-05-24 13:57:16 -07:00 committed by Ali Mohammad Pur
parent d654fe0e41
commit 1ef5071d1b
6 changed files with 217 additions and 108 deletions

View file

@ -34,20 +34,32 @@ String OutlineItem::to_string(int indent) const
return builder.to_string();
}
Document::Document(const ReadonlyBytes& bytes)
: m_parser(Parser({}, bytes))
RefPtr<Document> Document::create(const ReadonlyBytes& bytes)
{
m_parser.set_document(this);
auto parser = adopt_ref(*new Parser({}, bytes));
auto document = adopt_ref(*new Document(parser));
VERIFY(m_parser.perform_validation());
auto [xref_table, trailer] = m_parser.parse_last_xref_table_and_trailer();
VERIFY(parser->perform_validation());
auto xref_table_and_trailer_opt = parser->parse_last_xref_table_and_trailer();
if (!xref_table_and_trailer_opt.has_value())
return {};
m_xref_table = xref_table;
m_trailer = trailer;
auto [xref_table, trailer] = xref_table_and_trailer_opt.value();
m_catalog = m_trailer->get_dict(this, CommonNames::Root);
build_page_tree();
build_outline();
document->m_xref_table = xref_table;
document->m_trailer = trailer;
document->m_catalog = document->m_trailer->get_dict(document, CommonNames::Root);
document->build_page_tree();
document->build_outline();
return document;
}
Document::Document(const NonnullRefPtr<Parser>& parser)
: m_parser(parser)
{
m_parser->set_document(this);
}
Value Document::get_or_load_value(u32 index)
@ -58,7 +70,7 @@ Value Document::get_or_load_value(u32 index)
VERIFY(m_xref_table.has_object(index));
auto byte_offset = m_xref_table.byte_offset_for_object(index);
auto indirect_value = m_parser.parse_indirect_value_at_offset(byte_offset);
auto indirect_value = m_parser->parse_indirect_value_at_offset(byte_offset);
VERIFY(indirect_value->index() == index);
value = indirect_value->value();
m_values.set(index, value);
@ -145,14 +157,19 @@ Value Document::resolve(const Value& value)
return obj;
}
void Document::build_page_tree()
bool Document::build_page_tree()
{
if (!m_catalog->contains(CommonNames::Pages))
return false;
auto page_tree = m_catalog->get_dict(this, CommonNames::Pages);
add_page_tree_node_to_page_tree(page_tree);
return add_page_tree_node_to_page_tree(page_tree);
}
void Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tree)
bool Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tree)
{
if (!page_tree->contains(CommonNames::Kids) || !page_tree->contains(CommonNames::Count))
return false;
auto kids_array = page_tree->get_array(this, CommonNames::Kids);
auto page_count = page_tree->get(CommonNames::Count).value().as_int();
@ -163,20 +180,24 @@ void Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tr
for (auto& value : *kids_array) {
auto reference_index = value.as_ref_index();
auto byte_offset = m_xref_table.byte_offset_for_object(reference_index);
auto maybe_page_tree_node = m_parser.conditionally_parse_page_tree_node_at_offset(byte_offset);
bool ok;
auto maybe_page_tree_node = m_parser->conditionally_parse_page_tree_node_at_offset(byte_offset, ok);
if (!ok)
return false;
if (maybe_page_tree_node) {
add_page_tree_node_to_page_tree(maybe_page_tree_node.release_nonnull());
if (!add_page_tree_node_to_page_tree(maybe_page_tree_node.release_nonnull()))
return false;
} else {
m_page_object_indices.append(reference_index);
}
}
return;
} else {
// We know all of the kids are leaf nodes
for (auto& value : *kids_array)
m_page_object_indices.append(value.as_ref_index());
}
// We know all of the kids are leaf nodes
for (auto& value : *kids_array)
m_page_object_indices.append(value.as_ref_index());
return true;
}
void Document::build_outline()
@ -187,8 +208,8 @@ void Document::build_outline()
auto outline_dict = m_catalog->get_dict(this, CommonNames::Outlines);
if (!outline_dict->contains(CommonNames::First))
return;
VERIFY(outline_dict->contains(CommonNames::Last));
if (!outline_dict->contains(CommonNames::Last))
return;
auto first_ref = outline_dict->get_value(CommonNames::First);
auto last_ref = outline_dict->get_value(CommonNames::Last);