From 5420261347163126430d9818452f5e774d7b1fca Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 6 Jan 2023 00:33:24 +0800 Subject: [PATCH] LibPDF: Implement name tree lookups Name Trees are hierarchical, string-keyed, sorted-by-key dictionary structures in PDF where each node (except the root) specifies the bounds of the values it holds, and either its kids (more nodes) or the key/value pairs it contains. This commit implements a series of lookup calls for finding a key in such name trees. This implementation follows the tree as needed on each lookup, but if that becomes inefficient in the long run we can switch to creating a HashMap with all the contents, which as a drawback will require more memory. --- Userland/Libraries/LibPDF/CommonNames.h | 2 ++ Userland/Libraries/LibPDF/Document.cpp | 40 +++++++++++++++++++++++++ Userland/Libraries/LibPDF/Document.h | 4 +++ 3 files changed, 46 insertions(+) diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index 85f878c4be..a3832ba2b3 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -93,12 +93,14 @@ A(Length1) \ A(Length2) \ A(Length3) \ + A(Limits) \ A(Linearized) \ A(ML) \ A(Matrix) \ A(MediaBox) \ A(MissingWidth) \ A(N) \ + A(Names) \ A(Next) \ A(O) \ A(OP) \ diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index a8f85e8a23..acc43ef192 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -199,6 +199,46 @@ PDFErrorOr Document::add_page_tree_node_to_page_tree(NonnullRefPtr> Document::find_in_name_tree(NonnullRefPtr tree, FlyString name) +{ + if (tree->contains(CommonNames::Kids)) { + return find_in_name_tree_nodes(tree->get_array(CommonNames::Kids), name); + } + if (!tree->contains(CommonNames::Names)) + return Error { Error::Type::MalformedPDF, "name tree has neither Kids nor Names" }; + auto key_value_names_array = TRY(tree->get_array(this, CommonNames::Names)); + return find_in_key_value_array(key_value_names_array, name); +} + +PDFErrorOr> Document::find_in_name_tree_nodes(NonnullRefPtr siblings, FlyString name) +{ + for (size_t i = 0; i < siblings->size(); i++) { + auto sibling = TRY(resolve_to(siblings->at(i))); + auto limits = sibling->get_array(CommonNames::Limits); + if (limits->size() != 2) + return Error { Error::Type::MalformedPDF, "Expected 2-element Limits array" }; + auto start = limits->get_string_at(0); + auto end = limits->get_string_at(1); + if (start->string() <= name && end->string() >= name) { + return find_in_name_tree(sibling, name); + } + } + return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find node in name tree containing name {}", name) }; +} + +PDFErrorOr> Document::find_in_key_value_array(NonnullRefPtr key_value_array, FlyString name) +{ + if (key_value_array->size() % 2 == 1) + return Error { Error::Type::MalformedPDF, "key/value array has dangling key" }; + for (size_t i = 0; i < key_value_array->size() / 2; i++) { + auto key = key_value_array->get_string_at(2 * i); + if (key->string() == name) { + return key_value_array->get_object_at(this, 2 * i + 1); + } + } + return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find expected name {} in key/value array", name) }; +} + PDFErrorOr Document::build_outline() { if (!m_catalog->contains(CommonNames::Outlines)) diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 7557f677ea..3b183ceab9 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -139,6 +139,10 @@ private: PDFErrorOr> get_inheritable_object(FlyString const& name, NonnullRefPtr); + PDFErrorOr> find_in_name_tree(NonnullRefPtr root, FlyString name); + PDFErrorOr> find_in_name_tree_nodes(NonnullRefPtr siblings, FlyString name); + PDFErrorOr> find_in_key_value_array(NonnullRefPtr key_value_array, FlyString name); + NonnullRefPtr m_parser; RefPtr m_catalog; RefPtr m_trailer;