From aba787a441a201ace6743f54e84a5d3078e4277f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 12 Oct 2023 10:53:01 -0400 Subject: [PATCH] LibPDF: Implement reading of CFF String Index Only really useful for reading SIDs in the Top DICT (copyright text etc), which we currently don't do. I haven't seen a difference from looking things up in the string table. The only real effect from the commit that I need is that it pulls a local resolve() labmda into a real function resolve_sid(), which I want to call in a future commit. But it makes things more spec-compliant, and if we ever want to read SIDs in metadata in the future, now we can. --- Userland/Libraries/LibPDF/Fonts/CFF.cpp | 41 +++++++++++++++++-------- Userland/Libraries/LibPDF/Fonts/CFF.h | 5 ++- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.cpp b/Userland/Libraries/LibPDF/Fonts/CFF.cpp index 121a7a2876..2e16efef1a 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.cpp +++ b/Userland/Libraries/LibPDF/Fonts/CFF.cpp @@ -104,13 +104,14 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt }); })); - // FIXME: CFF spec "10 String Index" + auto strings = TRY(parse_strings(reader)); + // FIXME: CFF spec "16 Local/Global Subrs INDEXes" // "Global subrs are stored in an INDEX structure which follows the String INDEX." // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); - auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); + auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size(), strings)); // Adjust glyphs' widths as they are deltas from nominalWidthX for (auto& glyph : glyphs) { @@ -385,30 +386,44 @@ static constexpr Array s_cff_builtin_names { "zcaron"sv, }; -PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) +PDFErrorOr> CFF::parse_strings(Reader& reader) +{ + // CFF spec "10 String Index" + Vector strings; + TRY(parse_index(reader, [&](ReadonlyBytes const& string) -> PDFErrorOr { + return TRY(strings.try_append(string)); + })); + return strings; +} + +DeprecatedFlyString CFF::resolve_sid(SID sid, Vector const& strings) +{ + if (sid < s_cff_builtin_names.size()) + return DeprecatedFlyString(s_cff_builtin_names[sid]); + + if (sid - s_cff_builtin_names.size() < strings.size()) + return DeprecatedFlyString(strings[sid - s_cff_builtin_names.size()]); + + dbgln("Couldn't find string for SID {}, going with space", sid); + return DeprecatedFlyString("space"); +} + +PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count, Vector const& strings) { // CFF spec, "13 Charsets" Vector names; - auto resolve = [](SID sid) { - if (sid < s_cff_builtin_names.size()) - return DeprecatedFlyString(s_cff_builtin_names[sid]); - // FIXME: Read from String INDEX instead. - dbgln("Cound't find string for SID {}, going with space", sid); - return DeprecatedFlyString("space"); - }; - auto format = TRY(reader.try_read()); if (format == 0) { for (u8 i = 0; i < glyph_count - 1; i++) { SID sid = TRY(reader.try_read>()); - TRY(names.try_append(resolve(sid))); + TRY(names.try_append(resolve_sid(sid, strings))); } } else if (format == 1) { while (names.size() < glyph_count - 1) { auto first_sid = TRY(reader.try_read>()); int left = TRY(reader.try_read()); for (u8 sid = first_sid; left >= 0; left--, sid++) - TRY(names.try_append(resolve(sid))); + TRY(names.try_append(resolve_sid(sid, strings))); } } return names; diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.h b/Userland/Libraries/LibPDF/Fonts/CFF.h index ded18e5653..b43310c2ed 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.h +++ b/Userland/Libraries/LibPDF/Fonts/CFF.h @@ -85,9 +85,12 @@ public: template static PDFErrorOr parse_dict_operator(u8, Reader&); + static PDFErrorOr> parse_strings(Reader&); + static PDFErrorOr> parse_charstrings(Reader&&, Vector const& subroutines); - static PDFErrorOr> parse_charset(Reader&&, size_t); + static DeprecatedFlyString resolve_sid(SID, Vector const&); + static PDFErrorOr> parse_charset(Reader&&, size_t, Vector const&); static PDFErrorOr> parse_encoding(Reader&&); };