diff --git a/Userland/Libraries/LibPDF/CMakeLists.txt b/Userland/Libraries/LibPDF/CMakeLists.txt index dc34aca20e..955d3b7c37 100644 --- a/Userland/Libraries/LibPDF/CMakeLists.txt +++ b/Userland/Libraries/LibPDF/CMakeLists.txt @@ -6,6 +6,7 @@ set(SOURCES Encoding.cpp Encryption.cpp Filter.cpp + Fonts/CFF.cpp Fonts/PDFFont.cpp Fonts/PS1FontProgram.cpp Fonts/TrueTypeFont.cpp diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.cpp b/Userland/Libraries/LibPDF/Fonts/CFF.cpp new file mode 100644 index 0000000000..db7dce8343 --- /dev/null +++ b/Userland/Libraries/LibPDF/Fonts/CFF.cpp @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2023, Rodrigo Tobar . + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include + +namespace PDF { + +PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr encoding) +{ + Reader reader(cff_bytes); + + // Header + // skip major, minor version + reader.consume(2); + auto header_size = TRY(reader.try_read()); + // skip offset size + reader.consume(1); + reader.move_to(header_size); + + // Name INDEX + Vector font_names; + TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr { + auto string = TRY(String::from_utf8(data)); + return TRY(font_names.try_append(string)); + })); + + auto cff = adopt_ref(*new CFF()); + cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f }); + + // Top DICT INDEX + int charset_offset = 0; + Vector encoding_codes; + auto charstrings_offset = 0; + Vector subroutines; + int defaultWidthX = 0; + int nominalWidthX = 0; + TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) { + Reader element_reader { element_data }; + return parse_dict(element_reader, [&](TopDictOperator op, Vector const& operands) -> PDFErrorOr { + switch (op) { + case TopDictOperator::Encoding: { + auto encoding_offset = 0; + if (!operands.is_empty()) + encoding_offset = operands[0].get(); + encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)))); + break; + } + case TopDictOperator::Charset: { + if (!operands.is_empty()) + charset_offset = operands[0].get(); + break; + } + case TopDictOperator::CharStrings: { + if (!operands.is_empty()) + charstrings_offset = operands[0].get(); + break; + } + case TopDictOperator::Private: { + auto private_dict_size = operands[0].get(); + auto private_dict_offset = operands[1].get(); + Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) }; + TRY(parse_dict(priv_dict_reader, [&](PrivDictOperator op, Vector const& operands) -> PDFErrorOr { + switch (op) { + case PrivDictOperator::Subrs: { + auto subrs_offset = operands[0].get(); + Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) }; + dbgln("Parsing Subrs INDEX"); + TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr { + return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes)))); + })); + break; + } + case PrivDictOperator::DefaultWidthX: + defaultWidthX = operands[0].get(); + break; + case PrivDictOperator::NominalWidthX: + nominalWidthX = operands[0].get(); + break; + } + return {}; + })); + break; + } + default:; + } + return {}; + }); + })); + + // Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding + auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); + auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); + + // Adjust glyphs' widths as they are deltas from nominalWidthX + for (auto& glyph : glyphs) { + if (!glyph.width_specified) + glyph.width = float(defaultWidthX); + else + glyph.width += float(nominalWidthX); + } + + // Encoding given or read + if (encoding) { + for (size_t i = 0; i < glyphs.size(); i++) { + if (i == 0) { + TRY(cff->add_glyph(0, move(glyphs[0]))); + continue; + } + auto const& name = charset[i - 1]; + u16 code = encoding->get_char_code(name); + TRY(cff->add_glyph(code, move(glyphs[i]))); + } + cff->set_encoding(move(encoding)); + } else { + HashMap descriptors; + for (size_t i = 0; i < glyphs.size(); i++) { + if (i == 0) { + TRY(cff->add_glyph(0, move(glyphs[0]))); + descriptors.set(0, CharDescriptor { ".notdef", 0 }); + continue; + } + auto code = encoding_codes[i - 1]; + auto char_name = charset[i - 1]; + TRY(cff->add_glyph(code, move(glyphs[i]))); + descriptors.set(code, CharDescriptor { char_name, code }); + } + cff->set_encoding(TRY(Encoding::create(descriptors))); + } + + return cff; +} + +HashMap CFF::builtin_names { + { 0, ".notdef" }, + { 1, "space" }, + { 9, "parenleft" }, + { 10, "parenright" }, + { 13, "comma" }, + { 14, "hyphen" }, + { 15, "period" }, + + { 17, "zero" }, + { 18, "one" }, + { 19, "two" }, + { 20, "three" }, + { 21, "four" }, + { 22, "five" }, + { 23, "six" }, + { 24, "seven" }, + { 25, "eight" }, + { 26, "nine" }, + { 27, "colon" }, + { 28, "semicolon" }, + + { 34, "A" }, + { 35, "B" }, + { 36, "C" }, + { 37, "D" }, + { 38, "E" }, + { 39, "F" }, + { 40, "G" }, + { 41, "H" }, + { 42, "I" }, + { 43, "J" }, + { 44, "K" }, + { 45, "L" }, + { 46, "M" }, + { 47, "N" }, + { 48, "O" }, + { 49, "P" }, + { 50, "Q" }, + { 51, "R" }, + { 52, "S" }, + { 53, "T" }, + { 54, "U" }, + { 55, "V" }, + { 56, "W" }, + { 57, "X" }, + { 58, "Y" }, + { 59, "Z" }, + { 66, "a" }, + { 67, "b" }, + { 68, "c" }, + { 69, "d" }, + { 70, "e" }, + { 71, "f" }, + { 72, "g" }, + { 73, "h" }, + { 74, "i" }, + { 75, "j" }, + { 76, "k" }, + { 77, "l" }, + { 78, "m" }, + { 79, "n" }, + { 80, "o" }, + { 81, "p" }, + { 82, "q" }, + { 83, "r" }, + { 84, "s" }, + { 85, "t" }, + { 86, "u" }, + { 87, "v" }, + { 88, "w" }, + { 89, "x" }, + { 90, "y" }, + { 91, "z" }, + + { 104, "quotesingle" }, + { 105, "quotedblleft" }, + + { 111, "endash" }, + + { 116, "bullet" }, + + { 119, "quotedblright" }, + + { 137, "emdash" }, + + { 170, "copyright" }, +}; + +PDFErrorOr> CFF::parse_charset(Reader&& reader, size_t glyph_count) +{ + Vector names; + auto resolve = [](SID sid) { + auto x = builtin_names.find(sid); + if (x == builtin_names.end()) { + dbgln("Cound't find string for SID {}, going with space", sid); + return DeprecatedFlyString("space"); + } + return x->value; + }; + + auto format = TRY(reader.try_read()); + if (format == 0) { + for (u8 i = 0; i < glyph_count - 1; i++) { + SID sid = TRY(reader.try_read>()); + TRY(names.try_append(resolve(sid))); + } + } else if (format == 1) { + while (names.size() < glyph_count - 1) { + auto first_sid = TRY(reader.try_read>()); + int left = TRY(reader.try_read()); + for (u8 sid = first_sid; left >= 0; left--, sid++) + TRY(names.try_append(resolve(sid))); + } + } + return names; +} + +PDFErrorOr> CFF::parse_charstrings(Reader&& reader, Vector const& subroutines) +{ + Vector glyphs; + TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr { + GlyphParserState state; + auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true)); + return TRY(glyphs.try_append(glyph)); + })); + return glyphs; +} + +PDFErrorOr> CFF::parse_encoding(Reader&& reader) +{ + Vector encoding_codes; + auto format = TRY(reader.try_read()); + if (format == 0) { + auto n_codes = TRY(reader.try_read()); + for (u8 i = 0; i < n_codes; i++) { + TRY(encoding_codes.try_append(TRY(reader.try_read()))); + } + } else if (format == 1) { + auto n_ranges = TRY(reader.try_read()); + for (u8 i = 0; i < n_ranges; i++) { + auto first_code = TRY(reader.try_read()); + int left = TRY(reader.try_read()); + for (u8 code = first_code; left >= 0; left--, code++) + TRY(encoding_codes.try_append(code)); + } + } else + return error(DeprecatedString::formatted("Invalid encoding format: {}", format)); + return encoding_codes; +} + +template +PDFErrorOr CFF::parse_dict(Reader& reader, DictEntryHandler&& handler) +{ + Vector operands; + while (reader.remaining() > 0) { + auto b0 = reader.read(); + // A command + if (b0 <= 21) { + auto op = TRY(parse_dict_operator(b0, reader)); + TRY(handler(op, operands)); + operands.clear(); + continue; + } + // An operand + TRY(operands.try_append(TRY(load_dict_operand(b0, reader)))); + } + return {}; +} + +template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); +template PDFErrorOr CFF::parse_dict(Reader&, DictEntryHandler&&); + +template +PDFErrorOr CFF::parse_dict_operator(u8 b0, Reader& reader) +{ + VERIFY(b0 <= 21); + if (b0 != 12) + return OperatorT { (int)b0 }; + auto b1 = TRY(reader.try_read()); + return OperatorT { b0 << 8 | b1 }; +} + +template PDFErrorOr CFF::parse_dict_operator(u8, Reader&); + +PDFErrorOr CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler) +{ + Card16 count = TRY(reader.try_read>()); + if (count == 0) + return {}; + auto offset_size = TRY(reader.try_read()); + if (offset_size == 1) + return parse_index_data(count, reader, data_handler); + if (offset_size == 2) + return parse_index_data(count, reader, data_handler); + if (offset_size == 4) + return parse_index_data(count, reader, data_handler); + VERIFY_NOT_REACHED(); +} + +template +PDFErrorOr CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler) +{ + OffsetType last_data_end = 1; + auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1; + for (u16 i = 0; i < count; i++) { + reader.save(); + reader.move_by(sizeof(OffsetType) * i); + OffsetType data_start = reader.read>(); + last_data_end = reader.read>(); + auto data_size = last_data_end - data_start; + reader.move_to(offset_refpoint + data_start); + TRY(handler(reader.bytes().slice(reader.offset(), data_size))); + reader.load(); + } + reader.move_to(offset_refpoint + last_data_end); + return {}; +} + +template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); +template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); +template PDFErrorOr CFF::parse_index_data(Card16, Reader&, IndexDataHandler&); + +// 4 DICT DATA, Table 3 Operand Encoding +int CFF::load_int_dict_operand(u8 b0, Reader& reader) +{ + if (b0 >= 32 && b0 <= 246) { + return b0 - 139; + } + if (b0 >= 247 && b0 <= 250) { + auto b1 = reader.read(); + return (b0 - 247) * 256 + b1 + 108; + } + if (b0 >= 251 && b0 <= 254) { + auto b1 = reader.read(); + return -(b0 - 251) * 256 - b1 - 108; + } + if (b0 == 28) { + auto b1 = reader.read(); + auto b2 = reader.read(); + return b1 << 8 | b2; + } + if (b0 == 29) { + auto b1 = reader.read(); + auto b2 = reader.read(); + auto b3 = reader.read(); + auto b4 = reader.read(); + return b1 << 24 | b2 << 16 | b3 << 8 | b4; + } + VERIFY_NOT_REACHED(); +} + +float CFF::load_float_dict_operand(Reader& reader) +{ + StringBuilder sb; + auto add_nibble = [&](char nibble) { + if (nibble < 0xa) + sb.append('0' + nibble); + else if (nibble == 0xa) + sb.append('.'); + else if (nibble == 0xb) + sb.append('E'); + else if (nibble == 0xc) + sb.append("E-"sv); + else if (nibble == 0xe) + sb.append('-'); + }; + while (true) { + auto byte = reader.read(); + char nibble1 = (byte & 0xf0) >> 4; + char nibble2 = byte & 0x0f; + if (nibble1 == 0xf) + break; + add_nibble(nibble1); + if (nibble2 == 0xf) + break; + add_nibble(nibble2); + } + auto result = AK::StringUtils::convert_to_floating_point(sb.string_view()); + return result.release_value(); +} + +PDFErrorOr CFF::load_dict_operand(u8 b0, Reader& reader) +{ + if (b0 == 30) + return load_float_dict_operand(reader); + if (b0 >= 28) + return load_int_dict_operand(b0, reader); + return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) }; +} +} diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.h b/Userland/Libraries/LibPDF/Fonts/CFF.h new file mode 100644 index 0000000000..7f911ae9ef --- /dev/null +++ b/Userland/Libraries/LibPDF/Fonts/CFF.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2023, Rodrigo Tobar . + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace PDF { + +class Reader; + +class CFF : public Type1FontProgram { + +private: + // Table 9: Top DICT Operator Entries + enum class TopDictOperator { + Version = 0, + Notice, + FullName, + FamilyName, + Weight, + FontBBox, + // UniqueID = 13, + // XUID, + Charset = 15, + Encoding, + CharStrings, + Private, + // IsFixedPitch = (12 << 8 | 1), + // ItalicAngle, + // UnderlinePosition, + // UnderlineThickness, + // PaintType, + }; + + enum class PrivDictOperator { + Subrs = 19, + DefaultWidthX, + NominalWidthX, + }; + +public: + static PDFErrorOr> create(ReadonlyBytes const&, RefPtr encoding); + + // to private + using Card8 = u8; + using Card16 = u16; + using Offset = i32; + using OffSize = u8; + using SID = u16; + using DictOperand = Variant; + + static int load_int_dict_operand(u8 b0, Reader&); + static float load_float_dict_operand(Reader&); + static PDFErrorOr load_dict_operand(u8, Reader&); + + using IndexDataHandler = Function(ReadonlyBytes const&)>; + static PDFErrorOr parse_index(Reader& reader, IndexDataHandler&&); + + template + static PDFErrorOr parse_index_data(Card16 count, Reader& reader, IndexDataHandler&); + + template + using DictEntryHandler = Function(OperatorT, Vector const&)>; + + template + static PDFErrorOr parse_dict(Reader& reader, DictEntryHandler&& handler); + + template + static PDFErrorOr parse_dict_operator(u8, Reader&); + + static PDFErrorOr> parse_charstrings(Reader&&, Vector const& subroutines); + + static PDFErrorOr> parse_charset(Reader&&, size_t); + static PDFErrorOr> parse_encoding(Reader&&); + + static HashMap builtin_names; +}; + +}