From d451197d3dc192e0b50de86f783a91d7b117fba0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 12 Oct 2023 10:18:15 -0400 Subject: [PATCH] LibPDF: Add spec comments to CFF --- Userland/Libraries/LibPDF/Encoding.cpp | 1 + Userland/Libraries/LibPDF/Fonts/CFF.cpp | 30 ++++++++++++++++++++----- Userland/Libraries/LibPDF/Fonts/CFF.h | 5 ++++- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Userland/Libraries/LibPDF/Encoding.cpp b/Userland/Libraries/LibPDF/Encoding.cpp index 549c71d4c9..c6d3d7d8cc 100644 --- a/Userland/Libraries/LibPDF/Encoding.cpp +++ b/Userland/Libraries/LibPDF/Encoding.cpp @@ -19,6 +19,7 @@ NonnullRefPtr Encoding::create() PDFErrorOr> Encoding::from_object(Document* document, NonnullRefPtr const& obj) { if (obj->is()) { + // PDF 1.7 spec, 5.5.5 "Character Encoding" auto name = obj->cast()->name(); if (name == "StandardEncoding") return standard_encoding(); diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.cpp b/Userland/Libraries/LibPDF/Fonts/CFF.cpp index 82a1c67d84..ecb73a169a 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.cpp +++ b/Userland/Libraries/LibPDF/Fonts/CFF.cpp @@ -4,6 +4,8 @@ * SPDX-License-Identifier: BSD-2-Clause */ +// CFF spec: https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf + #include #include #include @@ -18,7 +20,7 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt { Reader reader(cff_bytes); - // Header + // CFF spec, "6 Header" // skip major, minor version reader.consume(2); auto header_size = TRY(reader.try_read()); @@ -26,7 +28,7 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt reader.consume(1); reader.move_to(header_size); - // Name INDEX + // CFF spec, "7 Name INDEX" Vector font_names; TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr { auto string = TRY(String::from_utf8(data)); @@ -36,7 +38,7 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt auto cff = adopt_ref(*new CFF()); cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f }); - // Top DICT INDEX + // CFF spec, "8 Top DICT INDEX" int charset_offset = 0; Vector encoding_codes; auto charstrings_offset = 0; @@ -71,6 +73,8 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt TRY(parse_dict(priv_dict_reader, [&](PrivDictOperator op, Vector const& operands) -> PDFErrorOr { switch (op) { case PrivDictOperator::Subrs: { + // CFF spec, "16 Local/Global Subrs INDEXes" + // "Local subrs are stored in an INDEX structure which is located via the offset operand of the Subrs operator in the Private DICT." auto subrs_offset = operands[0].get(); Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) }; dbgln("Parsing Subrs INDEX"); @@ -98,6 +102,10 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt }); })); + // FIXME: CFF spec "10 String Index" + // FIXME: CFF spec "16 Local/Global Subrs INDEXes" + // "Global subrs are stored in an INDEX structure which follows the String INDEX." + // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); @@ -375,6 +383,7 @@ static constexpr Array s_cff_builtin_names { PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) { + // CFF spec, "13 Charsets" Vector names; auto resolve = [](SID sid) { if (sid < s_cff_builtin_names.size()) @@ -402,6 +411,7 @@ PDFErrorOr> CFF::parse_charset(Reader&& reader, size PDFErrorOr> CFF::parse_charstrings(Reader&& reader, Vector const& subroutines) { + // CFF spec, "14 CharStrings INDEX" Vector glyphs; TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr { GlyphParserState state; @@ -413,6 +423,7 @@ PDFErrorOr> CFF::parse_charstrings(Reader&& reader, Vector> CFF::parse_encoding(Reader&& reader) { + // CFF spec, "12 Encodings" Vector encoding_codes; auto format_raw = TRY(reader.try_read()); // TODO: support encoding supplements when highest bit is set @@ -425,6 +436,7 @@ PDFErrorOr> CFF::parse_encoding(Reader&& reader) } else if (format == 1) { auto n_ranges = TRY(reader.try_read()); for (u8 i = 0; i < n_ranges; i++) { + // CFF spec, "Table 13 Range1 Format (Encoding)" auto first_code = TRY(reader.try_read()); int left = TRY(reader.try_read()); for (u8 code = first_code; left >= 0; left--, code++) @@ -438,10 +450,11 @@ PDFErrorOr> CFF::parse_encoding(Reader&& reader) template PDFErrorOr CFF::parse_dict(Reader& reader, DictEntryHandler&& handler) { + // CFF spec, "4 DICT data" Vector operands; while (reader.remaining() > 0) { auto b0 = reader.read(); - // A command + // "Operators and operands may be distinguished by inspection of their first byte: 0-21 specify operators" if (b0 <= 21) { auto op = TRY(parse_dict_operator(b0, reader)); TRY(handler(op, operands)); @@ -460,7 +473,10 @@ template PDFErrorOr CFF::parse_dict(Reader&, DictEn template PDFErrorOr CFF::parse_dict_operator(u8 b0, Reader& reader) { + // CFF spec, "4 DICT data" VERIFY(b0 <= 21); + + // "Two-byte operators have an initial escape byte of 12." if (b0 != 12) return OperatorT { (int)b0 }; auto b1 = TRY(reader.try_read()); @@ -471,6 +487,7 @@ template PDFErrorOr CFF::parse_dict_operator(u8, Reader&); PDFErrorOr CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler) { + // CFF spec, "5 INDEX Data" Card16 count = TRY(reader.try_read>()); if (count == 0) return {}; @@ -487,6 +504,7 @@ PDFErrorOr CFF::parse_index(Reader& reader, IndexDataHandler&& data_handle template PDFErrorOr CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler) { + // CFF spec, "5 INDEX Data" OffsetType last_data_end = 1; auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1; for (u16 i = 0; i < count; i++) { @@ -507,9 +525,9 @@ template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHa template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); -// 4 DICT DATA, Table 3 Operand Encoding int CFF::load_int_dict_operand(u8 b0, Reader& reader) { + // CFF spec, "Table 3 Operand Encoding" if (b0 >= 32 && b0 <= 246) { return b0 - 139; } @@ -538,6 +556,7 @@ int CFF::load_int_dict_operand(u8 b0, Reader& reader) float CFF::load_float_dict_operand(Reader& reader) { + // CFF spec, "Table 5 Nibble Definitions" StringBuilder sb; auto add_nibble = [&](char nibble) { if (nibble < 0xa) @@ -568,6 +587,7 @@ float CFF::load_float_dict_operand(Reader& reader) PDFErrorOr CFF::load_dict_operand(u8 b0, Reader& reader) { + // CFF spec, "4 DICT data" if (b0 == 30) return load_float_dict_operand(reader); if (b0 >= 28) diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.h b/Userland/Libraries/LibPDF/Fonts/CFF.h index 2e57b37594..ded18e5653 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.h +++ b/Userland/Libraries/LibPDF/Fonts/CFF.h @@ -15,10 +15,12 @@ namespace PDF { class Reader; +// CFF spec: https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf + class CFF : public Type1FontProgram { private: - // Table 9: Top DICT Operator Entries + // CFF spec, "Table 9 Top DICT Operator Entries" enum class TopDictOperator { Version = 0, Notice, @@ -39,6 +41,7 @@ private: // PaintType, }; + // CFF spec, "Table 23 Private DICT Operators" enum class PrivDictOperator { Subrs = 19, DefaultWidthX,