/* * Copyright (c) 2023, Rodrigo Tobar . * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include namespace PDF { PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr encoding) { Reader reader(cff_bytes); // Header // skip major, minor version reader.consume(2); auto header_size = TRY(reader.try_read()); // skip offset size reader.consume(1); reader.move_to(header_size); // Name INDEX Vector font_names; TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr { auto string = TRY(String::from_utf8(data)); return TRY(font_names.try_append(string)); })); auto cff = adopt_ref(*new CFF()); cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f }); // Top DICT INDEX int charset_offset = 0; Vector encoding_codes; auto charstrings_offset = 0; Vector subroutines; int defaultWidthX = 0; int nominalWidthX = 0; TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) { Reader element_reader { element_data }; return parse_dict(element_reader, [&](TopDictOperator op, Vector const& operands) -> PDFErrorOr { switch (op) { case TopDictOperator::Encoding: { auto encoding_offset = 0; if (!operands.is_empty()) encoding_offset = operands[0].get(); encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)))); break; } case TopDictOperator::Charset: { if (!operands.is_empty()) charset_offset = operands[0].get(); break; } case TopDictOperator::CharStrings: { if (!operands.is_empty()) charstrings_offset = operands[0].get(); break; } case TopDictOperator::Private: { auto private_dict_size = operands[0].get(); auto private_dict_offset = operands[1].get(); Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) }; TRY(parse_dict(priv_dict_reader, [&](PrivDictOperator op, Vector const& operands) -> PDFErrorOr { switch (op) { case PrivDictOperator::Subrs: { auto subrs_offset = operands[0].get(); Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) }; dbgln("Parsing Subrs INDEX"); TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr { return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes)))); })); break; } case PrivDictOperator::DefaultWidthX: defaultWidthX = operands[0].get(); break; case PrivDictOperator::NominalWidthX: nominalWidthX = operands[0].get(); break; } return {}; })); break; } default:; } return {}; }); })); // Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); // Adjust glyphs' widths as they are deltas from nominalWidthX for (auto& glyph : glyphs) { if (!glyph.has_width()) glyph.set_width(float(defaultWidthX)); else glyph.set_width(glyph.width() + float(nominalWidthX)); } for (size_t i = 0; i < glyphs.size(); i++) { if (i == 0) { TRY(cff->add_glyph(0, move(glyphs[0]))); continue; } auto const& name = charset[i - 1]; TRY(cff->add_glyph(name, move(glyphs[i]))); } cff->consolidate_glyphs(); // Encoding given or read if (encoding) { cff->set_encoding(move(encoding)); } else { auto encoding = Encoding::create(); for (size_t i = 0; i < glyphs.size(); i++) { if (i == 0) { encoding->set(0, ".notdef"); continue; } auto code = encoding_codes[i - 1]; auto char_name = charset[i - 1]; encoding->set(code, char_name); } cff->set_encoding(move(encoding)); } return cff; } HashMap CFF::builtin_names { { 0, ".notdef" }, { 1, "space" }, { 9, "parenleft" }, { 10, "parenright" }, { 13, "comma" }, { 14, "hyphen" }, { 15, "period" }, { 17, "zero" }, { 18, "one" }, { 19, "two" }, { 20, "three" }, { 21, "four" }, { 22, "five" }, { 23, "six" }, { 24, "seven" }, { 25, "eight" }, { 26, "nine" }, { 27, "colon" }, { 28, "semicolon" }, { 34, "A" }, { 35, "B" }, { 36, "C" }, { 37, "D" }, { 38, "E" }, { 39, "F" }, { 40, "G" }, { 41, "H" }, { 42, "I" }, { 43, "J" }, { 44, "K" }, { 45, "L" }, { 46, "M" }, { 47, "N" }, { 48, "O" }, { 49, "P" }, { 50, "Q" }, { 51, "R" }, { 52, "S" }, { 53, "T" }, { 54, "U" }, { 55, "V" }, { 56, "W" }, { 57, "X" }, { 58, "Y" }, { 59, "Z" }, { 66, "a" }, { 67, "b" }, { 68, "c" }, { 69, "d" }, { 70, "e" }, { 71, "f" }, { 72, "g" }, { 73, "h" }, { 74, "i" }, { 75, "j" }, { 76, "k" }, { 77, "l" }, { 78, "m" }, { 79, "n" }, { 80, "o" }, { 81, "p" }, { 82, "q" }, { 83, "r" }, { 84, "s" }, { 85, "t" }, { 86, "u" }, { 87, "v" }, { 88, "w" }, { 89, "x" }, { 90, "y" }, { 91, "z" }, { 104, "quotesingle" }, { 105, "quotedblleft" }, { 111, "endash" }, { 116, "bullet" }, { 119, "quotedblright" }, { 137, "emdash" }, { 170, "copyright" }, }; PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) { Vector names; auto resolve = [](SID sid) { auto x = builtin_names.find(sid); if (x == builtin_names.end()) { dbgln("Cound't find string for SID {}, going with space", sid); return DeprecatedFlyString("space"); } return x->value; }; auto format = TRY(reader.try_read()); if (format == 0) { for (u8 i = 0; i < glyph_count - 1; i++) { SID sid = TRY(reader.try_read>()); TRY(names.try_append(resolve(sid))); } } else if (format == 1) { while (names.size() < glyph_count - 1) { auto first_sid = TRY(reader.try_read>()); int left = TRY(reader.try_read()); for (u8 sid = first_sid; left >= 0; left--, sid++) TRY(names.try_append(resolve(sid))); } } return names; } PDFErrorOr> CFF::parse_charstrings(Reader&& reader, Vector const& subroutines) { Vector glyphs; TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr { GlyphParserState state; auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true)); return TRY(glyphs.try_append(glyph)); })); return glyphs; } PDFErrorOr> CFF::parse_encoding(Reader&& reader) { Vector encoding_codes; auto format = TRY(reader.try_read()); if (format == 0) { auto n_codes = TRY(reader.try_read()); for (u8 i = 0; i < n_codes; i++) { TRY(encoding_codes.try_append(TRY(reader.try_read()))); } } else if (format == 1) { auto n_ranges = TRY(reader.try_read()); for (u8 i = 0; i < n_ranges; i++) { auto first_code = TRY(reader.try_read()); int left = TRY(reader.try_read()); for (u8 code = first_code; left >= 0; left--, code++) TRY(encoding_codes.try_append(code)); } } else return error(DeprecatedString::formatted("Invalid encoding format: {}", format)); return encoding_codes; } template PDFErrorOr CFF::parse_dict(Reader& reader, DictEntryHandler&& handler) { Vector operands; while (reader.remaining() > 0) { auto b0 = reader.read(); // A command if (b0 <= 21) { auto op = TRY(parse_dict_operator(b0, reader)); TRY(handler(op, operands)); operands.clear(); continue; } // An operand TRY(operands.try_append(TRY(load_dict_operand(b0, reader)))); } return {}; } template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); template PDFErrorOr CFF::parse_dict_operator(u8 b0, Reader& reader) { VERIFY(b0 <= 21); if (b0 != 12) return OperatorT { (int)b0 }; auto b1 = TRY(reader.try_read()); return OperatorT { b0 << 8 | b1 }; } template PDFErrorOr CFF::parse_dict_operator(u8, Reader&); PDFErrorOr CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler) { Card16 count = TRY(reader.try_read>()); if (count == 0) return {}; auto offset_size = TRY(reader.try_read()); if (offset_size == 1) return parse_index_data(count, reader, data_handler); if (offset_size == 2) return parse_index_data(count, reader, data_handler); if (offset_size == 4) return parse_index_data(count, reader, data_handler); VERIFY_NOT_REACHED(); } template PDFErrorOr CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler) { OffsetType last_data_end = 1; auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1; for (u16 i = 0; i < count; i++) { reader.save(); reader.move_by(sizeof(OffsetType) * i); OffsetType data_start = reader.read>(); last_data_end = reader.read>(); auto data_size = last_data_end - data_start; reader.move_to(offset_refpoint + data_start); TRY(handler(reader.bytes().slice(reader.offset(), data_size))); reader.load(); } reader.move_to(offset_refpoint + last_data_end); return {}; } template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); // 4 DICT DATA, Table 3 Operand Encoding int CFF::load_int_dict_operand(u8 b0, Reader& reader) { if (b0 >= 32 && b0 <= 246) { return b0 - 139; } if (b0 >= 247 && b0 <= 250) { auto b1 = reader.read(); return (b0 - 247) * 256 + b1 + 108; } if (b0 >= 251 && b0 <= 254) { auto b1 = reader.read(); return -(b0 - 251) * 256 - b1 - 108; } if (b0 == 28) { auto b1 = reader.read(); auto b2 = reader.read(); return b1 << 8 | b2; } if (b0 == 29) { auto b1 = reader.read(); auto b2 = reader.read(); auto b3 = reader.read(); auto b4 = reader.read(); return b1 << 24 | b2 << 16 | b3 << 8 | b4; } VERIFY_NOT_REACHED(); } float CFF::load_float_dict_operand(Reader& reader) { StringBuilder sb; auto add_nibble = [&](char nibble) { if (nibble < 0xa) sb.append('0' + nibble); else if (nibble == 0xa) sb.append('.'); else if (nibble == 0xb) sb.append('E'); else if (nibble == 0xc) sb.append("E-"sv); else if (nibble == 0xe) sb.append('-'); }; while (true) { auto byte = reader.read(); char nibble1 = (byte & 0xf0) >> 4; char nibble2 = byte & 0x0f; if (nibble1 == 0xf) break; add_nibble(nibble1); if (nibble2 == 0xf) break; add_nibble(nibble2); } auto result = AK::StringUtils::convert_to_floating_point(sb.string_view()); return result.release_value(); } PDFErrorOr CFF::load_dict_operand(u8 b0, Reader& reader) { if (b0 == 30) return load_float_dict_operand(reader); if (b0 >= 28) return load_int_dict_operand(b0, reader); return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) }; } }