From 8e3c54f203318bab2921fb20ff1d6efd6d6c025b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 28 Feb 2024 17:36:51 -0500 Subject: [PATCH] LibPDF: Implement ZapfDingbats clause of the adobe glphy list algorithm Liberation Sans still doesn't have the vast majority of the Zapf Dingbats glyphs, but now we map the Zapf Dingbats names to good unicode values. So we only need to use a different font and all should work. (And Liberation Sans has _some_ of the glyphs, like 13 of the 223.) And we now render empty squares instead of wrong glyphs for the ones we don't have. I haven't seen any PDFs using ZapfDingbats in the wild, but they probably exist somewhere. (Tests/LibPDF/standard-14-fonts.pdf is a synthetic PDF using it.) --- .../Libraries/LibPDF/Fonts/AdobeGlyphList.cpp | 244 ++++++++++++++++-- .../Libraries/LibPDF/Fonts/AdobeGlyphList.h | 2 +- .../Libraries/LibPDF/Fonts/TrueTypeFont.cpp | 16 +- .../Libraries/LibPDF/Fonts/TrueTypeFont.h | 5 +- Userland/Libraries/LibPDF/Fonts/Type1Font.cpp | 7 +- 5 files changed, 242 insertions(+), 32 deletions(-) diff --git a/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp b/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp index 0f2a8dd58c..c9057e9343 100644 --- a/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp +++ b/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp @@ -12,23 +12,24 @@ Contains the Adobe Glyph List, transformed into C++ via this script: import sys -print(f'static HashMap constexpr glyph_list = {{') -for line in open('glyphlist.txt'): - line = line.strip() - if line.startswith('#'): - continue +def write_table(source_filename, table_name): + print(f'static HashMap constexpr {table_name} = {{') + for line in open(source_filename): + line = line.strip() + if line.startswith('#'): + continue + name, codepoint = line.split(';') + if ' ' in codepoint: + print(f'skipping {name}, multiple codepoints', file=sys.stderr) + continue + print(f' {{ "{name}"sv, 0x{codepoint} }},') + print(f'}};') - name, codepoint = line.split(';') - - if ' ' in codepoint: - print(f'skipping {name}, multiple codepoints', file=sys.stderr) - continue - - print(f' {{ "{name}"sv, 0x{codepoint} }},') -print(f'}};') +write_table('glyphlist.txt', 'glyph_list') +write_table('zapfdingbats.txt', 'zapf_dingbats_list') ``` -where glyphlist.txt is from https://github.com/adobe-type-tools/agl-aglfn/blob/master/glyphlist.txt +where glyphlist.txt and zapfdingbats.txt are from https://github.com/adobe-type-tools/agl-aglfn/blob/master/glyphlist.txt */ #include @@ -4240,6 +4241,209 @@ static HashMap const glyph_list = { { "zuhiragana"sv, 0x305A }, { "zukatakana"sv, 0x30BA }, }; +static HashMap const zapf_dingbats_list = { + { "a100"sv, 0x275E }, + { "a101"sv, 0x2761 }, + { "a102"sv, 0x2762 }, + { "a103"sv, 0x2763 }, + { "a104"sv, 0x2764 }, + { "a105"sv, 0x2710 }, + { "a106"sv, 0x2765 }, + { "a107"sv, 0x2766 }, + { "a108"sv, 0x2767 }, + { "a109"sv, 0x2660 }, + { "a10"sv, 0x2721 }, + { "a110"sv, 0x2665 }, + { "a111"sv, 0x2666 }, + { "a112"sv, 0x2663 }, + { "a117"sv, 0x2709 }, + { "a118"sv, 0x2708 }, + { "a119"sv, 0x2707 }, + { "a11"sv, 0x261B }, + { "a120"sv, 0x2460 }, + { "a121"sv, 0x2461 }, + { "a122"sv, 0x2462 }, + { "a123"sv, 0x2463 }, + { "a124"sv, 0x2464 }, + { "a125"sv, 0x2465 }, + { "a126"sv, 0x2466 }, + { "a127"sv, 0x2467 }, + { "a128"sv, 0x2468 }, + { "a129"sv, 0x2469 }, + { "a12"sv, 0x261E }, + { "a130"sv, 0x2776 }, + { "a131"sv, 0x2777 }, + { "a132"sv, 0x2778 }, + { "a133"sv, 0x2779 }, + { "a134"sv, 0x277A }, + { "a135"sv, 0x277B }, + { "a136"sv, 0x277C }, + { "a137"sv, 0x277D }, + { "a138"sv, 0x277E }, + { "a139"sv, 0x277F }, + { "a13"sv, 0x270C }, + { "a140"sv, 0x2780 }, + { "a141"sv, 0x2781 }, + { "a142"sv, 0x2782 }, + { "a143"sv, 0x2783 }, + { "a144"sv, 0x2784 }, + { "a145"sv, 0x2785 }, + { "a146"sv, 0x2786 }, + { "a147"sv, 0x2787 }, + { "a148"sv, 0x2788 }, + { "a149"sv, 0x2789 }, + { "a14"sv, 0x270D }, + { "a150"sv, 0x278A }, + { "a151"sv, 0x278B }, + { "a152"sv, 0x278C }, + { "a153"sv, 0x278D }, + { "a154"sv, 0x278E }, + { "a155"sv, 0x278F }, + { "a156"sv, 0x2790 }, + { "a157"sv, 0x2791 }, + { "a158"sv, 0x2792 }, + { "a159"sv, 0x2793 }, + { "a15"sv, 0x270E }, + { "a160"sv, 0x2794 }, + { "a161"sv, 0x2192 }, + { "a162"sv, 0x27A3 }, + { "a163"sv, 0x2194 }, + { "a164"sv, 0x2195 }, + { "a165"sv, 0x2799 }, + { "a166"sv, 0x279B }, + { "a167"sv, 0x279C }, + { "a168"sv, 0x279D }, + { "a169"sv, 0x279E }, + { "a16"sv, 0x270F }, + { "a170"sv, 0x279F }, + { "a171"sv, 0x27A0 }, + { "a172"sv, 0x27A1 }, + { "a173"sv, 0x27A2 }, + { "a174"sv, 0x27A4 }, + { "a175"sv, 0x27A5 }, + { "a176"sv, 0x27A6 }, + { "a177"sv, 0x27A7 }, + { "a178"sv, 0x27A8 }, + { "a179"sv, 0x27A9 }, + { "a17"sv, 0x2711 }, + { "a180"sv, 0x27AB }, + { "a181"sv, 0x27AD }, + { "a182"sv, 0x27AF }, + { "a183"sv, 0x27B2 }, + { "a184"sv, 0x27B3 }, + { "a185"sv, 0x27B5 }, + { "a186"sv, 0x27B8 }, + { "a187"sv, 0x27BA }, + { "a188"sv, 0x27BB }, + { "a189"sv, 0x27BC }, + { "a18"sv, 0x2712 }, + { "a190"sv, 0x27BD }, + { "a191"sv, 0x27BE }, + { "a192"sv, 0x279A }, + { "a193"sv, 0x27AA }, + { "a194"sv, 0x27B6 }, + { "a195"sv, 0x27B9 }, + { "a196"sv, 0x2798 }, + { "a197"sv, 0x27B4 }, + { "a198"sv, 0x27B7 }, + { "a199"sv, 0x27AC }, + { "a19"sv, 0x2713 }, + { "a1"sv, 0x2701 }, + { "a200"sv, 0x27AE }, + { "a201"sv, 0x27B1 }, + { "a202"sv, 0x2703 }, + { "a203"sv, 0x2750 }, + { "a204"sv, 0x2752 }, + { "a205"sv, 0x276E }, + { "a206"sv, 0x2770 }, + { "a20"sv, 0x2714 }, + { "a21"sv, 0x2715 }, + { "a22"sv, 0x2716 }, + { "a23"sv, 0x2717 }, + { "a24"sv, 0x2718 }, + { "a25"sv, 0x2719 }, + { "a26"sv, 0x271A }, + { "a27"sv, 0x271B }, + { "a28"sv, 0x271C }, + { "a29"sv, 0x2722 }, + { "a2"sv, 0x2702 }, + { "a30"sv, 0x2723 }, + { "a31"sv, 0x2724 }, + { "a32"sv, 0x2725 }, + { "a33"sv, 0x2726 }, + { "a34"sv, 0x2727 }, + { "a35"sv, 0x2605 }, + { "a36"sv, 0x2729 }, + { "a37"sv, 0x272A }, + { "a38"sv, 0x272B }, + { "a39"sv, 0x272C }, + { "a3"sv, 0x2704 }, + { "a40"sv, 0x272D }, + { "a41"sv, 0x272E }, + { "a42"sv, 0x272F }, + { "a43"sv, 0x2730 }, + { "a44"sv, 0x2731 }, + { "a45"sv, 0x2732 }, + { "a46"sv, 0x2733 }, + { "a47"sv, 0x2734 }, + { "a48"sv, 0x2735 }, + { "a49"sv, 0x2736 }, + { "a4"sv, 0x260E }, + { "a50"sv, 0x2737 }, + { "a51"sv, 0x2738 }, + { "a52"sv, 0x2739 }, + { "a53"sv, 0x273A }, + { "a54"sv, 0x273B }, + { "a55"sv, 0x273C }, + { "a56"sv, 0x273D }, + { "a57"sv, 0x273E }, + { "a58"sv, 0x273F }, + { "a59"sv, 0x2740 }, + { "a5"sv, 0x2706 }, + { "a60"sv, 0x2741 }, + { "a61"sv, 0x2742 }, + { "a62"sv, 0x2743 }, + { "a63"sv, 0x2744 }, + { "a64"sv, 0x2745 }, + { "a65"sv, 0x2746 }, + { "a66"sv, 0x2747 }, + { "a67"sv, 0x2748 }, + { "a68"sv, 0x2749 }, + { "a69"sv, 0x274A }, + { "a6"sv, 0x271D }, + { "a70"sv, 0x274B }, + { "a71"sv, 0x25CF }, + { "a72"sv, 0x274D }, + { "a73"sv, 0x25A0 }, + { "a74"sv, 0x274F }, + { "a75"sv, 0x2751 }, + { "a76"sv, 0x25B2 }, + { "a77"sv, 0x25BC }, + { "a78"sv, 0x25C6 }, + { "a79"sv, 0x2756 }, + { "a7"sv, 0x271E }, + { "a81"sv, 0x25D7 }, + { "a82"sv, 0x2758 }, + { "a83"sv, 0x2759 }, + { "a84"sv, 0x275A }, + { "a85"sv, 0x276F }, + { "a86"sv, 0x2771 }, + { "a87"sv, 0x2772 }, + { "a88"sv, 0x2773 }, + { "a89"sv, 0x2768 }, + { "a8"sv, 0x271F }, + { "a90"sv, 0x2769 }, + { "a91"sv, 0x276C }, + { "a92"sv, 0x276D }, + { "a93"sv, 0x276A }, + { "a94"sv, 0x276B }, + { "a95"sv, 0x2774 }, + { "a96"sv, 0x2775 }, + { "a97"sv, 0x275B }, + { "a98"sv, 0x275C }, + { "a99"sv, 0x275D }, + { "a9"sv, 0x2720 }, +}; static bool are_all_uppercase_hex(StringView component) { @@ -4259,7 +4463,7 @@ static u32 decode_hex(StringView hex_string) return code_point; } -Optional glyph_name_to_unicode(StringView name) +Optional glyph_name_to_unicode(StringView name, bool is_zapf_dingbats) { // https://github.com/adobe-type-tools/agl-specification?tab=readme-ov-file#2-the-mapping // "To map a glyph name to a character string, follow the three steps below: @@ -4277,10 +4481,14 @@ Optional glyph_name_to_unicode(StringView name) // 3. Map each component to a character string according to the procedure below, and concatenate those strings; the result is the character string to which the glyph name is mapped. StringView component = name; - // If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), and the component is in the ITC Zapf Dingbats Glyph List, then map it to the corresponding character in that list." - // FIXME: Implement. + // If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), and the component is in the ITC Zapf Dingbats Glyph List, then map it to the corresponding character in that list. + if (is_zapf_dingbats) { + auto zapf_dingbats_entry = zapf_dingbats_list.get(component); + if (zapf_dingbats_entry.has_value()) + return zapf_dingbats_entry.value(); + } - // "Otherwise, if the component is in AGL, then map it to the corresponding character in that list. + // Otherwise, if the component is in AGL, then map it to the corresponding character in that list. auto agl_entry = glyph_list.get(component); if (agl_entry.has_value()) return agl_entry.value(); diff --git a/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h b/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h index c1e3993209..bc8ed530f3 100644 --- a/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h +++ b/Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h @@ -10,6 +10,6 @@ namespace PDF { -Optional glyph_name_to_unicode(StringView); +Optional glyph_name_to_unicode(StringView, bool is_zapf_dingbats); } diff --git a/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp b/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp index fb5c5d0f68..7bc4d3d673 100644 --- a/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp +++ b/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp @@ -15,16 +15,17 @@ namespace PDF { -TrueTypePainter::TrueTypePainter(AK::NonnullRefPtr font, NonnullRefPtr encoding, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional high_byte) +TrueTypePainter::TrueTypePainter(AK::NonnullRefPtr font, NonnullRefPtr encoding, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional high_byte, bool is_zapf_dingbats) : m_font(move(font)) , m_encoding(move(encoding)) , m_encoding_is_mac_roman_or_win_ansi(encoding_is_mac_roman_or_win_ansi) , m_is_nonsymbolic(is_nonsymbolic) , m_high_byte(high_byte) + , m_is_zapf_dingbats(is_zapf_dingbats) { } -NonnullOwnPtr TrueTypePainter::create(Document* document, NonnullRefPtr const& dict, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr font, NonnullRefPtr encoding) +NonnullOwnPtr TrueTypePainter::create(Document* document, NonnullRefPtr const& dict, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr font, NonnullRefPtr encoding, bool is_zapf_dingbats) { bool encoding_is_mac_roman_or_win_ansi = false; if (dict->contains(CommonNames::Encoding)) { @@ -55,7 +56,7 @@ NonnullOwnPtr TrueTypePainter::create(Document* document, Nonnu } } - return adopt_own(*new TrueTypePainter { move(font), move(encoding), encoding_is_mac_roman_or_win_ansi, containing_pdf_font.is_nonsymbolic(), high_byte }); + return adopt_own(*new TrueTypePainter { move(font), move(encoding), encoding_is_mac_roman_or_win_ansi, containing_pdf_font.is_nonsymbolic(), high_byte, is_zapf_dingbats }); } static void do_draw_glyph(Gfx::Painter& painter, Gfx::FloatPoint point, float width, u32 unicode, Gfx::Font const& font, ColorOrStyle const& style) @@ -106,7 +107,7 @@ PDFErrorOr TrueTypePainter::draw_glyph(Gfx::Painter& painter, Gfx::FloatPo // use the (3, 1) algorithm. // FIXME: Implement (1, 0) subtable support. auto char_name = m_encoding->get_name(char_code); - u32 unicode = glyph_name_to_unicode(char_name).value_or(char_code); + u32 unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code); if (m_font->contains_glyph(unicode)) { do_draw_glyph(painter, point, width, unicode, *m_font, style); return {}; @@ -132,7 +133,7 @@ PDFErrorOr TrueTypePainter::draw_glyph(Gfx::Painter& painter, Gfx::FloatPo // "If a character cannot be mapped in any of the ways described above, the results are implementation-dependent." // FIXME: Do something smarter? auto char_name = m_encoding->get_name(char_code); - unicode = glyph_name_to_unicode(char_name).value_or(char_code); + unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code); } do_draw_glyph(painter, point, width, unicode, *m_font, style); @@ -143,7 +144,7 @@ Optional TrueTypePainter::get_glyph_width(u8 char_code) const { // FIXME: Make this use the full char_code lookup method used in draw_glyph() once that's complete. auto char_name = m_encoding->get_name(char_code); - u32 unicode = glyph_name_to_unicode(char_name).value_or(char_code); + u32 unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code); return m_font->glyph_width(unicode); } @@ -177,7 +178,8 @@ PDFErrorOr TrueTypeFont::initialize(Document* document, NonnullRefPtr create(Document*, NonnullRefPtr const&, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr, NonnullRefPtr); + static NonnullOwnPtr create(Document*, NonnullRefPtr const&, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr, NonnullRefPtr, bool is_zapf_dingbats); PDFErrorOr draw_glyph(Gfx::Painter&, Gfx::FloatPoint, float width, u8 char_code, Renderer const&); Optional get_glyph_width(u8 char_code) const; void set_font_size(float font_size); private: - TrueTypePainter(AK::NonnullRefPtr, NonnullRefPtr, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional high_byte); + TrueTypePainter(AK::NonnullRefPtr, NonnullRefPtr, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional high_byte, bool is_zapf_dingbats); NonnullRefPtr m_font; NonnullRefPtr m_encoding; bool m_encoding_is_mac_roman_or_win_ansi { false }; bool m_is_nonsymbolic { false }; Optional m_high_byte; + bool m_is_zapf_dingbats { false }; }; class TrueTypeFont : public SimpleFont { diff --git a/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp b/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp index 82913c0fff..8f290c65d7 100644 --- a/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp +++ b/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp @@ -63,9 +63,8 @@ PDFErrorOr Type1Font::initialize(Document* document, NonnullRefPtr Type1Font::initialize(Document* document, NonnullRefPtr