/* * Copyright (c) 2023, Rodrigo Tobar . * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include namespace PDF { PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr encoding) { Reader reader(cff_bytes); // Header // skip major, minor version reader.consume(2); auto header_size = TRY(reader.try_read()); // skip offset size reader.consume(1); reader.move_to(header_size); // Name INDEX Vector font_names; TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr { auto string = TRY(String::from_utf8(data)); return TRY(font_names.try_append(string)); })); auto cff = adopt_ref(*new CFF()); cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f }); // Top DICT INDEX int charset_offset = 0; Vector encoding_codes; auto charstrings_offset = 0; Vector subroutines; int defaultWidthX = 0; int nominalWidthX = 0; TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) { Reader element_reader { element_data }; return parse_dict(element_reader, [&](TopDictOperator op, Vector const& operands) -> PDFErrorOr { switch (op) { case TopDictOperator::Encoding: { auto encoding_offset = 0; if (!operands.is_empty()) encoding_offset = operands[0].get(); encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)))); break; } case TopDictOperator::Charset: { if (!operands.is_empty()) charset_offset = operands[0].get(); break; } case TopDictOperator::CharStrings: { if (!operands.is_empty()) charstrings_offset = operands[0].get(); break; } case TopDictOperator::Private: { auto private_dict_size = operands[0].get(); auto private_dict_offset = operands[1].get(); Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) }; TRY(parse_dict(priv_dict_reader, [&](PrivDictOperator op, Vector const& operands) -> PDFErrorOr { switch (op) { case PrivDictOperator::Subrs: { auto subrs_offset = operands[0].get(); Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) }; dbgln("Parsing Subrs INDEX"); TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr { return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes)))); })); break; } case PrivDictOperator::DefaultWidthX: defaultWidthX = operands[0].get(); break; case PrivDictOperator::NominalWidthX: nominalWidthX = operands[0].get(); break; } return {}; })); break; } default:; } return {}; }); })); // Create glyphs (now that we have the subroutines) and associate missing information to store them and their encoding auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); // Adjust glyphs' widths as they are deltas from nominalWidthX for (auto& glyph : glyphs) { if (!glyph.has_width()) glyph.set_width(float(defaultWidthX)); else glyph.set_width(glyph.width() + float(nominalWidthX)); } for (size_t i = 0; i < glyphs.size(); i++) { if (i == 0) { TRY(cff->add_glyph(0, move(glyphs[0]))); continue; } auto const& name = charset[i - 1]; TRY(cff->add_glyph(name, move(glyphs[i]))); } cff->consolidate_glyphs(); // Encoding given or read if (encoding) { cff->set_encoding(move(encoding)); } else { auto encoding = Encoding::create(); for (size_t i = 0; i < glyphs.size(); i++) { if (i == 0) { encoding->set(0, ".notdef"); continue; } auto code = encoding_codes[i - 1]; auto char_name = charset[i - 1]; encoding->set(code, char_name); } cff->set_encoding(move(encoding)); } return cff; } /// Appendix C: Predefined Charsets static constexpr Array s_cff_builtin_names { ".notdef"sv, "space"sv, "exclam"sv, "quotedbl"sv, "numbersign"sv, "dollar"sv, "percent"sv, "ampersand"sv, "quoteright"sv, "parenleft"sv, "parenright"sv, "asterisk"sv, "plus"sv, "comma"sv, "hyphen"sv, "period"sv, "slash"sv, "zero"sv, "one"sv, "two"sv, "three"sv, "four"sv, "five"sv, "six"sv, "seven"sv, "eight"sv, "nine"sv, "colon"sv, "semicolon"sv, "less"sv, "equal"sv, "greater"sv, "question"sv, "at"sv, "A"sv, "B"sv, "C"sv, "D"sv, "E"sv, "F"sv, "G"sv, "H"sv, "I"sv, "J"sv, "K"sv, "L"sv, "M"sv, "N"sv, "O"sv, "P"sv, "Q"sv, "R"sv, "S"sv, "T"sv, "U"sv, "V"sv, "W"sv, "X"sv, "Y"sv, "Z"sv, "bracketleft"sv, "backslash"sv, "bracketright"sv, "asciicircum"sv, "underscore"sv, "quoteleft"sv, "a"sv, "b"sv, "c"sv, "d"sv, "e"sv, "f"sv, "g"sv, "h"sv, "i"sv, "j"sv, "k"sv, "l"sv, "m"sv, "n"sv, "o"sv, "p"sv, "q"sv, "r"sv, "s"sv, "t"sv, "u"sv, "v"sv, "w"sv, "x"sv, "y"sv, "z"sv, "braceleft"sv, "bar"sv, "braceright"sv, "asciitilde"sv, "exclamdown"sv, "cent"sv, "sterling"sv, "fraction"sv, "yen"sv, "florin"sv, "section"sv, "currency"sv, "quotesingle"sv, "quotedblleft"sv, "guillemotleft"sv, "guilsinglleft"sv, "guilsinglright"sv, "fi"sv, "fl"sv, "endash"sv, "dagger"sv, "daggerdbl"sv, "periodcentered"sv, "paragraph"sv, "bullet"sv, "quotesinglbase"sv, "quotedblbase"sv, "quotedblright"sv, "guillemotright"sv, "ellipsis"sv, "perthousand"sv, "questiondown"sv, "grave"sv, "acute"sv, "circumflex"sv, "tilde"sv, "macron"sv, "breve"sv, "dotaccent"sv, "dieresis"sv, "ring"sv, "cedilla"sv, "hungarumlaut"sv, "ogonek"sv, "caron"sv, "emdash"sv, "AE"sv, "ordfeminine"sv, "Lslash"sv, "Oslash"sv, "OE"sv, "ordmasculine"sv, "ae"sv, "dotlessi"sv, "lslash"sv, "oslash"sv, "oe"sv, "germandbls"sv, "onesuperior"sv, "logicalnot"sv, "mu"sv, "trademark"sv, "Eth"sv, "onehalf"sv, "plusminus"sv, "Thorn"sv, "onequarter"sv, "divide"sv, "brokenbar"sv, "degree"sv, "thorn"sv, "threequarters"sv, "twosuperior"sv, "registered"sv, "minus"sv, "eth"sv, "multiply"sv, "threesuperior"sv, "copyright"sv, "Aacute"sv, "Acircumflex"sv, "Adieresis"sv, "Agrave"sv, "Aring"sv, "Atilde"sv, "Ccedilla"sv, "Eacute"sv, "Ecircumflex"sv, "Edieresis"sv, "Egrave"sv, "Iacute"sv, "Icircumflex"sv, "Idieresis"sv, "Igrave"sv, "Ntilde"sv, "Oacute"sv, "Ocircumflex"sv, "Odieresis"sv, "Ograve"sv, "Otilde"sv, "Scaron"sv, "Uacute"sv, "Ucircumflex"sv, "Udieresis"sv, "Ugrave"sv, "Yacute"sv, "Ydieresis"sv, "Zcaron"sv, "aacute"sv, "acircumflex"sv, "adieresis"sv, "agrave"sv, "aring"sv, "atilde"sv, "ccedilla"sv, "eacute"sv, "ecircumflex"sv, "edieresis"sv, "egrave"sv, "iacute"sv, "icircumflex"sv, "idieresis"sv, "igrave"sv, "ntilde"sv, "oacute"sv, "ocircumflex"sv, "odieresis"sv, "ograve"sv, "otilde"sv, "scaron"sv, "uacute"sv, "ucircumflex"sv, "udieresis"sv, "ugrave"sv, "yacute"sv, "ydieresis"sv, "zcaron"sv, }; PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) { Vector names; auto resolve = [](SID sid) { if (sid < s_cff_builtin_names.size()) return DeprecatedFlyString(s_cff_builtin_names[sid]); dbgln("Cound't find string for SID {}, going with space", sid); return DeprecatedFlyString("space"); }; auto format = TRY(reader.try_read()); if (format == 0) { for (u8 i = 0; i < glyph_count - 1; i++) { SID sid = TRY(reader.try_read>()); TRY(names.try_append(resolve(sid))); } } else if (format == 1) { while (names.size() < glyph_count - 1) { auto first_sid = TRY(reader.try_read>()); int left = TRY(reader.try_read()); for (u8 sid = first_sid; left >= 0; left--, sid++) TRY(names.try_append(resolve(sid))); } } return names; } PDFErrorOr> CFF::parse_charstrings(Reader&& reader, Vector const& subroutines) { Vector glyphs; TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr { GlyphParserState state; auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true)); return TRY(glyphs.try_append(glyph)); })); return glyphs; } PDFErrorOr> CFF::parse_encoding(Reader&& reader) { Vector encoding_codes; auto format_raw = TRY(reader.try_read()); // TODO: support encoding supplements when highest bit is set auto format = format_raw & 0x7f; if (format == 0) { auto n_codes = TRY(reader.try_read()); for (u8 i = 0; i < n_codes; i++) { TRY(encoding_codes.try_append(TRY(reader.try_read()))); } } else if (format == 1) { auto n_ranges = TRY(reader.try_read()); for (u8 i = 0; i < n_ranges; i++) { auto first_code = TRY(reader.try_read()); int left = TRY(reader.try_read()); for (u8 code = first_code; left >= 0; left--, code++) TRY(encoding_codes.try_append(code)); } } else return error(DeprecatedString::formatted("Invalid encoding format: {}", format)); return encoding_codes; } template PDFErrorOr CFF::parse_dict(Reader& reader, DictEntryHandler&& handler) { Vector operands; while (reader.remaining() > 0) { auto b0 = reader.read(); // A command if (b0 <= 21) { auto op = TRY(parse_dict_operator(b0, reader)); TRY(handler(op, operands)); operands.clear(); continue; } // An operand TRY(operands.try_append(TRY(load_dict_operand(b0, reader)))); } return {}; } template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); template PDFErrorOr CFF::parse_dict_operator(u8 b0, Reader& reader) { VERIFY(b0 <= 21); if (b0 != 12) return OperatorT { (int)b0 }; auto b1 = TRY(reader.try_read()); return OperatorT { b0 << 8 | b1 }; } template PDFErrorOr CFF::parse_dict_operator(u8, Reader&); PDFErrorOr CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler) { Card16 count = TRY(reader.try_read>()); if (count == 0) return {}; auto offset_size = TRY(reader.try_read()); if (offset_size == 1) return parse_index_data(count, reader, data_handler); if (offset_size == 2) return parse_index_data(count, reader, data_handler); if (offset_size == 4) return parse_index_data(count, reader, data_handler); VERIFY_NOT_REACHED(); } template PDFErrorOr CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler) { OffsetType last_data_end = 1; auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1; for (u16 i = 0; i < count; i++) { reader.save(); reader.move_by(sizeof(OffsetType) * i); OffsetType data_start = reader.read>(); last_data_end = reader.read>(); auto data_size = last_data_end - data_start; reader.move_to(offset_refpoint + data_start); TRY(handler(reader.bytes().slice(reader.offset(), data_size))); reader.load(); } reader.move_to(offset_refpoint + last_data_end); return {}; } template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); // 4 DICT DATA, Table 3 Operand Encoding int CFF::load_int_dict_operand(u8 b0, Reader& reader) { if (b0 >= 32 && b0 <= 246) { return b0 - 139; } if (b0 >= 247 && b0 <= 250) { auto b1 = reader.read(); return (b0 - 247) * 256 + b1 + 108; } if (b0 >= 251 && b0 <= 254) { auto b1 = reader.read(); return -(b0 - 251) * 256 - b1 - 108; } if (b0 == 28) { auto b1 = reader.read(); auto b2 = reader.read(); return b1 << 8 | b2; } if (b0 == 29) { auto b1 = reader.read(); auto b2 = reader.read(); auto b3 = reader.read(); auto b4 = reader.read(); return b1 << 24 | b2 << 16 | b3 << 8 | b4; } VERIFY_NOT_REACHED(); } float CFF::load_float_dict_operand(Reader& reader) { StringBuilder sb; auto add_nibble = [&](char nibble) { if (nibble < 0xa) sb.append('0' + nibble); else if (nibble == 0xa) sb.append('.'); else if (nibble == 0xb) sb.append('E'); else if (nibble == 0xc) sb.append("E-"sv); else if (nibble == 0xe) sb.append('-'); }; while (true) { auto byte = reader.read(); char nibble1 = (byte & 0xf0) >> 4; char nibble2 = byte & 0x0f; if (nibble1 == 0xf) break; add_nibble(nibble1); if (nibble2 == 0xf) break; add_nibble(nibble2); } auto result = AK::StringUtils::convert_to_floating_point(sb.string_view()); return result.release_value(); } PDFErrorOr CFF::load_dict_operand(u8 b0, Reader& reader) { if (b0 == 30) return load_float_dict_operand(reader); if (b0 >= 28) return load_int_dict_operand(b0, reader); return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) }; } }