1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 04:57:45 +00:00

LibPDF: Add accessor for the document's info dict

This dict contains some metadata in some files.

Newer files also contain XMP metadata, but it's recommended to
still include this dict as well, for compatibility with older readers.
And it's much less complex than XMP, so let's support it.
This commit is contained in:
Nico Weber 2023-07-10 08:59:03 -04:00 committed by Sam Atkins
parent 826c0426f3
commit c5c940b1c9
3 changed files with 93 additions and 0 deletions

View file

@ -13,6 +13,7 @@
A(Alternate) \ A(Alternate) \
A(ASCII85Decode) \ A(ASCII85Decode) \
A(ASCIIHexDecode) \ A(ASCIIHexDecode) \
A(Author) \
A(BG) \ A(BG) \
A(BG2) \ A(BG2) \
A(BM) \ A(BM) \
@ -31,6 +32,8 @@
A(Columns) \ A(Columns) \
A(Contents) \ A(Contents) \
A(Count) \ A(Count) \
A(CreationDate) \
A(Creator) \
A(CropBox) \ A(CropBox) \
A(Crypt) \ A(Crypt) \
A(D) \ A(D) \
@ -79,8 +82,10 @@
A(Image) \ A(Image) \
A(ImageMask) \ A(ImageMask) \
A(Index) \ A(Index) \
A(Info) \
A(JBIG2Decode) \ A(JBIG2Decode) \
A(JPXDecode) \ A(JPXDecode) \
A(Keywords) \
A(Kids) \ A(Kids) \
A(L) \ A(L) \
A(LC) \ A(LC) \
@ -99,6 +104,7 @@
A(Matrix) \ A(Matrix) \
A(MediaBox) \ A(MediaBox) \
A(MissingWidth) \ A(MissingWidth) \
A(ModDate) \
A(N) \ A(N) \
A(Names) \ A(Names) \
A(Next) \ A(Next) \
@ -113,6 +119,7 @@
A(Pattern) \ A(Pattern) \
A(Predictor) \ A(Predictor) \
A(Prev) \ A(Prev) \
A(Producer) \
A(R) \ A(R) \
A(RI) \ A(RI) \
A(Registry) \ A(Registry) \
@ -123,6 +130,7 @@
A(SA) \ A(SA) \
A(SM) \ A(SM) \
A(SMask) \ A(SMask) \
A(Subject) \
A(Subtype) \ A(Subtype) \
A(Supplement) \ A(Supplement) \
A(T) \ A(T) \

View file

@ -34,6 +34,46 @@ DeprecatedString OutlineItem::to_deprecated_string(int indent) const
return builder.to_deprecated_string(); return builder.to_deprecated_string();
} }
PDFErrorOr<Optional<DeprecatedString>> InfoDict::title() const
{
return get(CommonNames::Title);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::author() const
{
return get(CommonNames::Author);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::subject() const
{
return get(CommonNames::Subject);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::keywords() const
{
return get(CommonNames::Keywords);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::creator() const
{
return get(CommonNames::Creator);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::producer() const
{
return get(CommonNames::Producer);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::creation_date() const
{
return get(CommonNames::CreationDate);
}
PDFErrorOr<Optional<DeprecatedString>> InfoDict::modification_date() const
{
return get(CommonNames::ModDate);
}
PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes) PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes)
{ {
auto parser = adopt_ref(*new DocumentParser({}, bytes)); auto parser = adopt_ref(*new DocumentParser({}, bytes));
@ -189,6 +229,14 @@ PDFErrorOr<Value> Document::resolve(Value const& value)
return value; return value;
} }
PDFErrorOr<Optional<InfoDict>> Document::info_dict()
{
if (!trailer()->contains(CommonNames::Info))
return OptionalNone {};
return InfoDict(this, TRY(trailer()->get_dict(this, CommonNames::Info)));
}
PDFErrorOr<void> Document::build_page_tree() PDFErrorOr<void> Document::build_page_tree()
{ {
auto page_tree = TRY(m_catalog->get_dict(this, CommonNames::Pages)); auto page_tree = TRY(m_catalog->get_dict(this, CommonNames::Pages));

View file

@ -76,6 +76,41 @@ struct OutlineDict final : public RefCounted<OutlineDict> {
OutlineDict() = default; OutlineDict() = default;
}; };
class InfoDict {
public:
InfoDict(Document* document, NonnullRefPtr<DictObject> dict)
: m_document(document)
, m_info_dict(move(dict))
{
}
PDFErrorOr<Optional<DeprecatedString>> title() const;
PDFErrorOr<Optional<DeprecatedString>> author() const;
PDFErrorOr<Optional<DeprecatedString>> subject() const;
PDFErrorOr<Optional<DeprecatedString>> keywords() const;
// Name of the program that created the original, non-PDF file.
PDFErrorOr<Optional<DeprecatedString>> creator() const;
// Name of the program that converted the file to PDF.
PDFErrorOr<Optional<DeprecatedString>> producer() const;
// FIXME: Provide some helper for parsing the date strings returned by these two methods.
PDFErrorOr<Optional<DeprecatedString>> creation_date() const;
PDFErrorOr<Optional<DeprecatedString>> modification_date() const;
private:
PDFErrorOr<Optional<DeprecatedString>> get(DeprecatedFlyString const& name) const
{
if (!m_info_dict->contains(name))
return OptionalNone {};
return TRY(m_info_dict->get_string(m_document, name))->string();
}
WeakPtr<Document> m_document;
NonnullRefPtr<DictObject> m_info_dict;
};
class Document final class Document final
: public RefCounted<Document> : public RefCounted<Document>
, public Weakable<Document> { , public Weakable<Document> {
@ -124,6 +159,8 @@ public:
/// dict is being read). /// dict is being read).
bool can_resolve_references() { return m_parser->can_resolve_references(); } bool can_resolve_references() { return m_parser->can_resolve_references(); }
PDFErrorOr<Optional<InfoDict>> info_dict();
private: private:
explicit Document(NonnullRefPtr<DocumentParser> const& parser); explicit Document(NonnullRefPtr<DocumentParser> const& parser);