From 0624472768c4a65aa70f1a45e4780a360b37eef8 Mon Sep 17 00:00:00 2001 From: Matthew Olsson Date: Tue, 8 Mar 2022 12:08:12 -0700 Subject: [PATCH] LibPDF: Add initial support for Type1 fonts This is enough to get a char code -> code point mapping --- Userland/Libraries/LibPDF/CMakeLists.txt | 1 + Userland/Libraries/LibPDF/CommonNames.h | 11 ++++ Userland/Libraries/LibPDF/Fonts.cpp | 84 ++++++++++++++++++++++++ Userland/Libraries/LibPDF/Fonts.h | 37 +++++++++++ 4 files changed, 133 insertions(+) create mode 100644 Userland/Libraries/LibPDF/Fonts.cpp create mode 100644 Userland/Libraries/LibPDF/Fonts.h diff --git a/Userland/Libraries/LibPDF/CMakeLists.txt b/Userland/Libraries/LibPDF/CMakeLists.txt index 9bb11fa7eb..3b5fa712f0 100644 --- a/Userland/Libraries/LibPDF/CMakeLists.txt +++ b/Userland/Libraries/LibPDF/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES Document.cpp Encoding.cpp Filter.cpp + Fonts.cpp ObjectDerivatives.cpp Parser.cpp Renderer.cpp diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index def0ae95ae..f9fd91ac1b 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -36,11 +36,13 @@ V(DeviceRGB) \ V(Differences) \ V(E) \ + V(Encoding) \ V(ExtGState) \ V(F) \ V(FL) \ V(Filter) \ V(First) \ + V(FirstChar) \ V(Fit) \ V(FitB) \ V(FitBH) \ @@ -50,6 +52,11 @@ V(FitV) \ V(FlateDecode) \ V(Font) \ + V(FontDescriptor) \ + V(FontFamily) \ + V(FontFile1) \ + V(FontFile2) \ + V(FontFile3) \ V(Gamma) \ V(H) \ V(HT) \ @@ -63,6 +70,7 @@ V(LW) \ V(LZWDecode) \ V(Last) \ + V(LastChar) \ V(Length) \ V(Linearized) \ V(ML) \ @@ -87,16 +95,19 @@ V(SA) \ V(SM) \ V(SMask) \ + V(Subtype) \ V(T) \ V(TK) \ V(TR) \ V(TR2) \ V(Title) \ + V(ToUnicode) \ V(Type) \ V(UCR) \ V(UseBlackPTComp) \ V(UserUnit) \ V(WhitePoint) \ + V(Widths) \ V(XYZ) \ V(ca) \ V(op) diff --git a/Userland/Libraries/LibPDF/Fonts.cpp b/Userland/Libraries/LibPDF/Fonts.cpp new file mode 100644 index 0000000000..aa9b20cdf6 --- /dev/null +++ b/Userland/Libraries/LibPDF/Fonts.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2022, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace PDF { + +static bool is_standard_latin_font(FlyString const& font) +{ + return font.is_one_of( + "Times-Roman", + "Helvetica", + "Courier", + "Times-Bold", + "Helvetica-Bold", + "Courier-Bold", + "Times-Italic", + "Helvetica-Oblique", + "Courier-Oblique", + "Times-BoldItalic", + "Helvetica-BoldOblique", + "Courier-BoldOblique"); +} + +PDFErrorOr> PDFFont::create(Document* document, NonnullRefPtr dict) +{ + auto subtype = TRY(dict->get_name(document, CommonNames::Subtype))->name(); + + if (subtype == "Type1") + return TRY(Type1Font::create(document, dict)); + + TODO(); +} + +PDFErrorOr> Type1Font::create(Document* document, NonnullRefPtr dict) +{ + // FIXME: "Required except for the standard 14 fonts"... + // "Beginning with PDF 1.5, the special treatment given to the standard 14 + // fonts is deprecated. [...] For backwards capability, conforming readers + // shall still provide the special treatment identifier for the standard + // 14 fonts." + + RefPtr encoding; + + if (dict->contains(CommonNames::Encoding)) { + auto encoding_object = MUST(dict->get_object(document, CommonNames::Encoding)); + encoding = TRY(Encoding::from_object(document, encoding_object)); + } else { + auto base_font = MUST(dict->get_name(document, CommonNames::BaseFont))->name(); + if (is_standard_latin_font(base_font)) { + // FIXME: The spec doesn't specify what the encoding should be in this case + encoding = Encoding::standard_encoding(); + } else { + TODO(); + } + } + + RefPtr to_unicode; + if (dict->contains(CommonNames::ToUnicode)) + to_unicode = MUST(dict->get_stream(document, CommonNames::ToUnicode)); + + return adopt_ref(*new Type1Font(to_unicode, encoding.release_nonnull())); +} + +Type1Font::Type1Font(RefPtr to_unicode, NonnullRefPtr encoding) + : m_to_unicode(to_unicode) + , m_encoding(encoding) +{ +} + +u32 Type1Font::char_code_to_code_point(u16 char_code) const +{ + if (m_to_unicode) + TODO(); + + auto descriptor = m_encoding->get_char_code_descriptor(char_code); + return descriptor.code_point; +} + +} diff --git a/Userland/Libraries/LibPDF/Fonts.h b/Userland/Libraries/LibPDF/Fonts.h new file mode 100644 index 0000000000..51a45e19b2 --- /dev/null +++ b/Userland/Libraries/LibPDF/Fonts.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include + +namespace PDF { + +class PDFFont : public RefCounted { +public: + static PDFErrorOr> create(Document*, NonnullRefPtr); + + virtual ~PDFFont() = default; + + virtual u32 char_code_to_code_point(u16 char_code) const = 0; +}; + +class Type1Font : public PDFFont { +public: + static PDFErrorOr> create(Document*, NonnullRefPtr); + + Type1Font(RefPtr to_unicode, NonnullRefPtr); + ~Type1Font() override = default; + + u32 char_code_to_code_point(u16 char_code) const override; + +private: + RefPtr m_to_unicode; + NonnullRefPtr m_encoding; +}; + +}