From c5c940b1c92cde2686774d83bf897d418978788b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 10 Jul 2023 08:59:03 -0400 Subject: [PATCH] LibPDF: Add accessor for the document's info dict This dict contains some metadata in some files. Newer files also contain XMP metadata, but it's recommended to still include this dict as well, for compatibility with older readers. And it's much less complex than XMP, so let's support it. --- Userland/Libraries/LibPDF/CommonNames.h | 8 +++++ Userland/Libraries/LibPDF/Document.cpp | 48 +++++++++++++++++++++++++ Userland/Libraries/LibPDF/Document.h | 37 +++++++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index 33a487733d..6813882ffb 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -13,6 +13,7 @@ A(Alternate) \ A(ASCII85Decode) \ A(ASCIIHexDecode) \ + A(Author) \ A(BG) \ A(BG2) \ A(BM) \ @@ -31,6 +32,8 @@ A(Columns) \ A(Contents) \ A(Count) \ + A(CreationDate) \ + A(Creator) \ A(CropBox) \ A(Crypt) \ A(D) \ @@ -79,8 +82,10 @@ A(Image) \ A(ImageMask) \ A(Index) \ + A(Info) \ A(JBIG2Decode) \ A(JPXDecode) \ + A(Keywords) \ A(Kids) \ A(L) \ A(LC) \ @@ -99,6 +104,7 @@ A(Matrix) \ A(MediaBox) \ A(MissingWidth) \ + A(ModDate) \ A(N) \ A(Names) \ A(Next) \ @@ -113,6 +119,7 @@ A(Pattern) \ A(Predictor) \ A(Prev) \ + A(Producer) \ A(R) \ A(RI) \ A(Registry) \ @@ -123,6 +130,7 @@ A(SA) \ A(SM) \ A(SMask) \ + A(Subject) \ A(Subtype) \ A(Supplement) \ A(T) \ diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index 2478e8cdb4..7a657fd885 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -34,6 +34,46 @@ DeprecatedString OutlineItem::to_deprecated_string(int indent) const return builder.to_deprecated_string(); } +PDFErrorOr> InfoDict::title() const +{ + return get(CommonNames::Title); +} + +PDFErrorOr> InfoDict::author() const +{ + return get(CommonNames::Author); +} + +PDFErrorOr> InfoDict::subject() const +{ + return get(CommonNames::Subject); +} + +PDFErrorOr> InfoDict::keywords() const +{ + return get(CommonNames::Keywords); +} + +PDFErrorOr> InfoDict::creator() const +{ + return get(CommonNames::Creator); +} + +PDFErrorOr> InfoDict::producer() const +{ + return get(CommonNames::Producer); +} + +PDFErrorOr> InfoDict::creation_date() const +{ + return get(CommonNames::CreationDate); +} + +PDFErrorOr> InfoDict::modification_date() const +{ + return get(CommonNames::ModDate); +} + PDFErrorOr> Document::create(ReadonlyBytes bytes) { auto parser = adopt_ref(*new DocumentParser({}, bytes)); @@ -189,6 +229,14 @@ PDFErrorOr Document::resolve(Value const& value) return value; } +PDFErrorOr> Document::info_dict() +{ + if (!trailer()->contains(CommonNames::Info)) + return OptionalNone {}; + + return InfoDict(this, TRY(trailer()->get_dict(this, CommonNames::Info))); +} + PDFErrorOr Document::build_page_tree() { auto page_tree = TRY(m_catalog->get_dict(this, CommonNames::Pages)); diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index d195d7bba0..b3a6649bf4 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -76,6 +76,41 @@ struct OutlineDict final : public RefCounted { OutlineDict() = default; }; +class InfoDict { +public: + InfoDict(Document* document, NonnullRefPtr dict) + : m_document(document) + , m_info_dict(move(dict)) + { + } + + PDFErrorOr> title() const; + PDFErrorOr> author() const; + PDFErrorOr> subject() const; + PDFErrorOr> keywords() const; + + // Name of the program that created the original, non-PDF file. + PDFErrorOr> creator() const; + + // Name of the program that converted the file to PDF. + PDFErrorOr> producer() const; + + // FIXME: Provide some helper for parsing the date strings returned by these two methods. + PDFErrorOr> creation_date() const; + PDFErrorOr> modification_date() const; + +private: + PDFErrorOr> get(DeprecatedFlyString const& name) const + { + if (!m_info_dict->contains(name)) + return OptionalNone {}; + return TRY(m_info_dict->get_string(m_document, name))->string(); + } + + WeakPtr m_document; + NonnullRefPtr m_info_dict; +}; + class Document final : public RefCounted , public Weakable { @@ -124,6 +159,8 @@ public: /// dict is being read). bool can_resolve_references() { return m_parser->can_resolve_references(); } + PDFErrorOr> info_dict(); + private: explicit Document(NonnullRefPtr const& parser);