From c4507bb56e5ef970abe2d103a6bacbc41b847106 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Sat, 11 Feb 2023 14:11:12 +0800 Subject: [PATCH] LibPDF: Add more built-in SIDs The first iteration has enough SIDs to display simple documents, but when trying more and more documents we started to need more of these SIDs to be properly defined. This is a copy/paste exercise from the CFF document, which is tedious, so it will continue in small drops. This commit fills all the gaps until SID 228, which covers all the ISOAdobe space, and should be enough for most use cases. Since this is a continuous space starting at 0, we now use an Array instead of a Map to store these names, which should be more performant. Also to simplify things I've moved the Array out of the CFF class, making it a simpler static variable, which allows us to use template type deduction. --- Userland/Libraries/LibPDF/Fonts/CFF.cpp | 328 +++++++++++++++++------- Userland/Libraries/LibPDF/Fonts/CFF.h | 2 - 2 files changed, 235 insertions(+), 95 deletions(-) diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.cpp b/Userland/Libraries/LibPDF/Fonts/CFF.cpp index 9c8ba8fe9c..5307c594e9 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.cpp +++ b/Userland/Libraries/LibPDF/Fonts/CFF.cpp @@ -138,105 +138,247 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt return cff; } -HashMap CFF::builtin_names { - { 0, ".notdef" }, - { 1, "space" }, - { 9, "parenleft" }, - { 10, "parenright" }, - { 13, "comma" }, - { 14, "hyphen" }, - { 15, "period" }, - - { 17, "zero" }, - { 18, "one" }, - { 19, "two" }, - { 20, "three" }, - { 21, "four" }, - { 22, "five" }, - { 23, "six" }, - { 24, "seven" }, - { 25, "eight" }, - { 26, "nine" }, - { 27, "colon" }, - { 28, "semicolon" }, - - { 34, "A" }, - { 35, "B" }, - { 36, "C" }, - { 37, "D" }, - { 38, "E" }, - { 39, "F" }, - { 40, "G" }, - { 41, "H" }, - { 42, "I" }, - { 43, "J" }, - { 44, "K" }, - { 45, "L" }, - { 46, "M" }, - { 47, "N" }, - { 48, "O" }, - { 49, "P" }, - { 50, "Q" }, - { 51, "R" }, - { 52, "S" }, - { 53, "T" }, - { 54, "U" }, - { 55, "V" }, - { 56, "W" }, - { 57, "X" }, - { 58, "Y" }, - { 59, "Z" }, - { 66, "a" }, - { 67, "b" }, - { 68, "c" }, - { 69, "d" }, - { 70, "e" }, - { 71, "f" }, - { 72, "g" }, - { 73, "h" }, - { 74, "i" }, - { 75, "j" }, - { 76, "k" }, - { 77, "l" }, - { 78, "m" }, - { 79, "n" }, - { 80, "o" }, - { 81, "p" }, - { 82, "q" }, - { 83, "r" }, - { 84, "s" }, - { 85, "t" }, - { 86, "u" }, - { 87, "v" }, - { 88, "w" }, - { 89, "x" }, - { 90, "y" }, - { 91, "z" }, - - { 104, "quotesingle" }, - { 105, "quotedblleft" }, - - { 111, "endash" }, - - { 116, "bullet" }, - - { 119, "quotedblright" }, - - { 137, "emdash" }, - - { 170, "copyright" }, +/// Appendix C: Predefined Charsets +static constexpr Array s_cff_builtin_names { + ".notdef"sv, + "space"sv, + "exclam"sv, + "quotedbl"sv, + "numbersign"sv, + "dollar"sv, + "percent"sv, + "ampersand"sv, + "quoteright"sv, + "parenleft"sv, + "parenright"sv, + "asterisk"sv, + "plus"sv, + "comma"sv, + "hyphen"sv, + "period"sv, + "slash"sv, + "zero"sv, + "one"sv, + "two"sv, + "three"sv, + "four"sv, + "five"sv, + "six"sv, + "seven"sv, + "eight"sv, + "nine"sv, + "colon"sv, + "semicolon"sv, + "less"sv, + "equal"sv, + "greater"sv, + "question"sv, + "at"sv, + "A"sv, + "B"sv, + "C"sv, + "D"sv, + "E"sv, + "F"sv, + "G"sv, + "H"sv, + "I"sv, + "J"sv, + "K"sv, + "L"sv, + "M"sv, + "N"sv, + "O"sv, + "P"sv, + "Q"sv, + "R"sv, + "S"sv, + "T"sv, + "U"sv, + "V"sv, + "W"sv, + "X"sv, + "Y"sv, + "Z"sv, + "bracketleft"sv, + "backslash"sv, + "bracketright"sv, + "asciicircum"sv, + "underscore"sv, + "quoteleft"sv, + "a"sv, + "b"sv, + "c"sv, + "d"sv, + "e"sv, + "f"sv, + "g"sv, + "h"sv, + "i"sv, + "j"sv, + "k"sv, + "l"sv, + "m"sv, + "n"sv, + "o"sv, + "p"sv, + "q"sv, + "r"sv, + "s"sv, + "t"sv, + "u"sv, + "v"sv, + "w"sv, + "x"sv, + "y"sv, + "z"sv, + "braceleft"sv, + "bar"sv, + "braceright"sv, + "asciitilde"sv, + "exclamdown"sv, + "cent"sv, + "sterling"sv, + "fraction"sv, + "yen"sv, + "florin"sv, + "section"sv, + "currency"sv, + "quotesingle"sv, + "quotedblleft"sv, + "guillemotleft"sv, + "guilsinglleft"sv, + "guilsinglright"sv, + "fi"sv, + "fl"sv, + "endash"sv, + "dagger"sv, + "daggerdbl"sv, + "periodcentered"sv, + "paragraph"sv, + "bullet"sv, + "quotesinglbase"sv, + "quotedblbase"sv, + "quotedblright"sv, + "guillemotright"sv, + "ellipsis"sv, + "perthousand"sv, + "questiondown"sv, + "grave"sv, + "acute"sv, + "circumflex"sv, + "tilde"sv, + "macron"sv, + "breve"sv, + "dotaccent"sv, + "dieresis"sv, + "ring"sv, + "cedilla"sv, + "hungarumlaut"sv, + "ogonek"sv, + "caron"sv, + "emdash"sv, + "AE"sv, + "ordfeminine"sv, + "Lslash"sv, + "Oslash"sv, + "OE"sv, + "ordmasculine"sv, + "ae"sv, + "dotlessi"sv, + "lslash"sv, + "oslash"sv, + "oe"sv, + "germandbls"sv, + "onesuperior"sv, + "logicalnot"sv, + "mu"sv, + "trademark"sv, + "Eth"sv, + "onehalf"sv, + "plusminus"sv, + "Thorn"sv, + "onequarter"sv, + "divide"sv, + "brokenbar"sv, + "degree"sv, + "thorn"sv, + "threequarters"sv, + "twosuperior"sv, + "registered"sv, + "minus"sv, + "eth"sv, + "multiply"sv, + "threesuperior"sv, + "copyright"sv, + "Aacute"sv, + "Acircumflex"sv, + "Adieresis"sv, + "Agrave"sv, + "Aring"sv, + "Atilde"sv, + "Ccedilla"sv, + "Eacute"sv, + "Ecircumflex"sv, + "Edieresis"sv, + "Egrave"sv, + "Iacute"sv, + "Icircumflex"sv, + "Idieresis"sv, + "Igrave"sv, + "Ntilde"sv, + "Oacute"sv, + "Ocircumflex"sv, + "Odieresis"sv, + "Ograve"sv, + "Otilde"sv, + "Scaron"sv, + "Uacute"sv, + "Ucircumflex"sv, + "Udieresis"sv, + "Ugrave"sv, + "Yacute"sv, + "Ydieresis"sv, + "Zcaron"sv, + "aacute"sv, + "acircumflex"sv, + "adieresis"sv, + "agrave"sv, + "aring"sv, + "atilde"sv, + "ccedilla"sv, + "eacute"sv, + "ecircumflex"sv, + "edieresis"sv, + "egrave"sv, + "iacute"sv, + "icircumflex"sv, + "idieresis"sv, + "igrave"sv, + "ntilde"sv, + "oacute"sv, + "ocircumflex"sv, + "odieresis"sv, + "ograve"sv, + "otilde"sv, + "scaron"sv, + "uacute"sv, + "ucircumflex"sv, + "udieresis"sv, + "ugrave"sv, + "yacute"sv, + "ydieresis"sv, + "zcaron"sv, }; PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) { Vector names; auto resolve = [](SID sid) { - auto x = builtin_names.find(sid); - if (x == builtin_names.end()) { - dbgln("Cound't find string for SID {}, going with space", sid); - return DeprecatedFlyString("space"); - } - return x->value; + if (sid < s_cff_builtin_names.size()) + return DeprecatedFlyString(s_cff_builtin_names[sid]); + dbgln("Cound't find string for SID {}, going with space", sid); + return DeprecatedFlyString("space"); }; auto format = TRY(reader.try_read()); diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.h b/Userland/Libraries/LibPDF/Fonts/CFF.h index 7f911ae9ef..60914e9248 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.h +++ b/Userland/Libraries/LibPDF/Fonts/CFF.h @@ -79,8 +79,6 @@ public: static PDFErrorOr> parse_charset(Reader&&, size_t); static PDFErrorOr> parse_encoding(Reader&&); - - static HashMap builtin_names; }; }