mirror of
https://github.com/RGBCube/serenity
synced 2025-05-28 14:25:06 +00:00

The first iteration has enough SIDs to display simple documents, but when trying more and more documents we started to need more of these SIDs to be properly defined. This is a copy/paste exercise from the CFF document, which is tedious, so it will continue in small drops. This commit fills all the gaps until SID 228, which covers all the ISOAdobe space, and should be enough for most use cases. Since this is a continuous space starting at 0, we now use an Array instead of a Map to store these names, which should be more performant. Also to simplify things I've moved the Array out of the CFF class, making it a simpler static variable, which allows us to use template type deduction.
573 lines
16 KiB
C++
573 lines
16 KiB
C++
/*
|
|
* Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Endian.h>
|
|
#include <AK/String.h>
|
|
#include <LibGfx/Forward.h>
|
|
#include <LibPDF/Encoding.h>
|
|
#include <LibPDF/Error.h>
|
|
#include <LibPDF/Fonts/CFF.h>
|
|
#include <LibPDF/Reader.h>
|
|
|
|
namespace PDF {
|
|
|
|
PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
|
|
{
|
|
Reader reader(cff_bytes);
|
|
|
|
// Header
|
|
// skip major, minor version
|
|
reader.consume(2);
|
|
auto header_size = TRY(reader.try_read<Card8>());
|
|
// skip offset size
|
|
reader.consume(1);
|
|
reader.move_to(header_size);
|
|
|
|
// Name INDEX
|
|
Vector<String> font_names;
|
|
TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
|
|
auto string = TRY(String::from_utf8(data));
|
|
return TRY(font_names.try_append(string));
|
|
}));
|
|
|
|
auto cff = adopt_ref(*new CFF());
|
|
cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
|
|
|
|
// Top DICT INDEX
|
|
int charset_offset = 0;
|
|
Vector<u8> encoding_codes;
|
|
auto charstrings_offset = 0;
|
|
Vector<ByteBuffer> subroutines;
|
|
int defaultWidthX = 0;
|
|
int nominalWidthX = 0;
|
|
TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
|
|
Reader element_reader { element_data };
|
|
return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
|
|
switch (op) {
|
|
case TopDictOperator::Encoding: {
|
|
auto encoding_offset = 0;
|
|
if (!operands.is_empty())
|
|
encoding_offset = operands[0].get<int>();
|
|
encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
|
|
break;
|
|
}
|
|
case TopDictOperator::Charset: {
|
|
if (!operands.is_empty())
|
|
charset_offset = operands[0].get<int>();
|
|
break;
|
|
}
|
|
case TopDictOperator::CharStrings: {
|
|
if (!operands.is_empty())
|
|
charstrings_offset = operands[0].get<int>();
|
|
break;
|
|
}
|
|
case TopDictOperator::Private: {
|
|
auto private_dict_size = operands[0].get<int>();
|
|
auto private_dict_offset = operands[1].get<int>();
|
|
Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
|
|
TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
|
|
switch (op) {
|
|
case PrivDictOperator::Subrs: {
|
|
auto subrs_offset = operands[0].get<int>();
|
|
Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
|
|
dbgln("Parsing Subrs INDEX");
|
|
TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
|
|
return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
|
|
}));
|
|
break;
|
|
}
|
|
case PrivDictOperator::DefaultWidthX:
|
|
defaultWidthX = operands[0].get<int>();
|
|
break;
|
|
case PrivDictOperator::NominalWidthX:
|
|
nominalWidthX = operands[0].get<int>();
|
|
break;
|
|
}
|
|
return {};
|
|
}));
|
|
break;
|
|
}
|
|
default:;
|
|
}
|
|
return {};
|
|
});
|
|
}));
|
|
|
|
// Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding
|
|
auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines));
|
|
auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
|
|
|
|
// Adjust glyphs' widths as they are deltas from nominalWidthX
|
|
for (auto& glyph : glyphs) {
|
|
if (!glyph.has_width())
|
|
glyph.set_width(float(defaultWidthX));
|
|
else
|
|
glyph.set_width(glyph.width() + float(nominalWidthX));
|
|
}
|
|
|
|
for (size_t i = 0; i < glyphs.size(); i++) {
|
|
if (i == 0) {
|
|
TRY(cff->add_glyph(0, move(glyphs[0])));
|
|
continue;
|
|
}
|
|
auto const& name = charset[i - 1];
|
|
TRY(cff->add_glyph(name, move(glyphs[i])));
|
|
}
|
|
cff->consolidate_glyphs();
|
|
|
|
// Encoding given or read
|
|
if (encoding) {
|
|
cff->set_encoding(move(encoding));
|
|
} else {
|
|
auto encoding = Encoding::create();
|
|
for (size_t i = 0; i < glyphs.size(); i++) {
|
|
if (i == 0) {
|
|
encoding->set(0, ".notdef");
|
|
continue;
|
|
}
|
|
auto code = encoding_codes[i - 1];
|
|
auto char_name = charset[i - 1];
|
|
encoding->set(code, char_name);
|
|
}
|
|
cff->set_encoding(move(encoding));
|
|
}
|
|
|
|
return cff;
|
|
}
|
|
|
|
/// Appendix C: Predefined Charsets
|
|
static constexpr Array s_cff_builtin_names {
|
|
".notdef"sv,
|
|
"space"sv,
|
|
"exclam"sv,
|
|
"quotedbl"sv,
|
|
"numbersign"sv,
|
|
"dollar"sv,
|
|
"percent"sv,
|
|
"ampersand"sv,
|
|
"quoteright"sv,
|
|
"parenleft"sv,
|
|
"parenright"sv,
|
|
"asterisk"sv,
|
|
"plus"sv,
|
|
"comma"sv,
|
|
"hyphen"sv,
|
|
"period"sv,
|
|
"slash"sv,
|
|
"zero"sv,
|
|
"one"sv,
|
|
"two"sv,
|
|
"three"sv,
|
|
"four"sv,
|
|
"five"sv,
|
|
"six"sv,
|
|
"seven"sv,
|
|
"eight"sv,
|
|
"nine"sv,
|
|
"colon"sv,
|
|
"semicolon"sv,
|
|
"less"sv,
|
|
"equal"sv,
|
|
"greater"sv,
|
|
"question"sv,
|
|
"at"sv,
|
|
"A"sv,
|
|
"B"sv,
|
|
"C"sv,
|
|
"D"sv,
|
|
"E"sv,
|
|
"F"sv,
|
|
"G"sv,
|
|
"H"sv,
|
|
"I"sv,
|
|
"J"sv,
|
|
"K"sv,
|
|
"L"sv,
|
|
"M"sv,
|
|
"N"sv,
|
|
"O"sv,
|
|
"P"sv,
|
|
"Q"sv,
|
|
"R"sv,
|
|
"S"sv,
|
|
"T"sv,
|
|
"U"sv,
|
|
"V"sv,
|
|
"W"sv,
|
|
"X"sv,
|
|
"Y"sv,
|
|
"Z"sv,
|
|
"bracketleft"sv,
|
|
"backslash"sv,
|
|
"bracketright"sv,
|
|
"asciicircum"sv,
|
|
"underscore"sv,
|
|
"quoteleft"sv,
|
|
"a"sv,
|
|
"b"sv,
|
|
"c"sv,
|
|
"d"sv,
|
|
"e"sv,
|
|
"f"sv,
|
|
"g"sv,
|
|
"h"sv,
|
|
"i"sv,
|
|
"j"sv,
|
|
"k"sv,
|
|
"l"sv,
|
|
"m"sv,
|
|
"n"sv,
|
|
"o"sv,
|
|
"p"sv,
|
|
"q"sv,
|
|
"r"sv,
|
|
"s"sv,
|
|
"t"sv,
|
|
"u"sv,
|
|
"v"sv,
|
|
"w"sv,
|
|
"x"sv,
|
|
"y"sv,
|
|
"z"sv,
|
|
"braceleft"sv,
|
|
"bar"sv,
|
|
"braceright"sv,
|
|
"asciitilde"sv,
|
|
"exclamdown"sv,
|
|
"cent"sv,
|
|
"sterling"sv,
|
|
"fraction"sv,
|
|
"yen"sv,
|
|
"florin"sv,
|
|
"section"sv,
|
|
"currency"sv,
|
|
"quotesingle"sv,
|
|
"quotedblleft"sv,
|
|
"guillemotleft"sv,
|
|
"guilsinglleft"sv,
|
|
"guilsinglright"sv,
|
|
"fi"sv,
|
|
"fl"sv,
|
|
"endash"sv,
|
|
"dagger"sv,
|
|
"daggerdbl"sv,
|
|
"periodcentered"sv,
|
|
"paragraph"sv,
|
|
"bullet"sv,
|
|
"quotesinglbase"sv,
|
|
"quotedblbase"sv,
|
|
"quotedblright"sv,
|
|
"guillemotright"sv,
|
|
"ellipsis"sv,
|
|
"perthousand"sv,
|
|
"questiondown"sv,
|
|
"grave"sv,
|
|
"acute"sv,
|
|
"circumflex"sv,
|
|
"tilde"sv,
|
|
"macron"sv,
|
|
"breve"sv,
|
|
"dotaccent"sv,
|
|
"dieresis"sv,
|
|
"ring"sv,
|
|
"cedilla"sv,
|
|
"hungarumlaut"sv,
|
|
"ogonek"sv,
|
|
"caron"sv,
|
|
"emdash"sv,
|
|
"AE"sv,
|
|
"ordfeminine"sv,
|
|
"Lslash"sv,
|
|
"Oslash"sv,
|
|
"OE"sv,
|
|
"ordmasculine"sv,
|
|
"ae"sv,
|
|
"dotlessi"sv,
|
|
"lslash"sv,
|
|
"oslash"sv,
|
|
"oe"sv,
|
|
"germandbls"sv,
|
|
"onesuperior"sv,
|
|
"logicalnot"sv,
|
|
"mu"sv,
|
|
"trademark"sv,
|
|
"Eth"sv,
|
|
"onehalf"sv,
|
|
"plusminus"sv,
|
|
"Thorn"sv,
|
|
"onequarter"sv,
|
|
"divide"sv,
|
|
"brokenbar"sv,
|
|
"degree"sv,
|
|
"thorn"sv,
|
|
"threequarters"sv,
|
|
"twosuperior"sv,
|
|
"registered"sv,
|
|
"minus"sv,
|
|
"eth"sv,
|
|
"multiply"sv,
|
|
"threesuperior"sv,
|
|
"copyright"sv,
|
|
"Aacute"sv,
|
|
"Acircumflex"sv,
|
|
"Adieresis"sv,
|
|
"Agrave"sv,
|
|
"Aring"sv,
|
|
"Atilde"sv,
|
|
"Ccedilla"sv,
|
|
"Eacute"sv,
|
|
"Ecircumflex"sv,
|
|
"Edieresis"sv,
|
|
"Egrave"sv,
|
|
"Iacute"sv,
|
|
"Icircumflex"sv,
|
|
"Idieresis"sv,
|
|
"Igrave"sv,
|
|
"Ntilde"sv,
|
|
"Oacute"sv,
|
|
"Ocircumflex"sv,
|
|
"Odieresis"sv,
|
|
"Ograve"sv,
|
|
"Otilde"sv,
|
|
"Scaron"sv,
|
|
"Uacute"sv,
|
|
"Ucircumflex"sv,
|
|
"Udieresis"sv,
|
|
"Ugrave"sv,
|
|
"Yacute"sv,
|
|
"Ydieresis"sv,
|
|
"Zcaron"sv,
|
|
"aacute"sv,
|
|
"acircumflex"sv,
|
|
"adieresis"sv,
|
|
"agrave"sv,
|
|
"aring"sv,
|
|
"atilde"sv,
|
|
"ccedilla"sv,
|
|
"eacute"sv,
|
|
"ecircumflex"sv,
|
|
"edieresis"sv,
|
|
"egrave"sv,
|
|
"iacute"sv,
|
|
"icircumflex"sv,
|
|
"idieresis"sv,
|
|
"igrave"sv,
|
|
"ntilde"sv,
|
|
"oacute"sv,
|
|
"ocircumflex"sv,
|
|
"odieresis"sv,
|
|
"ograve"sv,
|
|
"otilde"sv,
|
|
"scaron"sv,
|
|
"uacute"sv,
|
|
"ucircumflex"sv,
|
|
"udieresis"sv,
|
|
"ugrave"sv,
|
|
"yacute"sv,
|
|
"ydieresis"sv,
|
|
"zcaron"sv,
|
|
};
|
|
|
|
PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
|
|
{
|
|
Vector<DeprecatedFlyString> names;
|
|
auto resolve = [](SID sid) {
|
|
if (sid < s_cff_builtin_names.size())
|
|
return DeprecatedFlyString(s_cff_builtin_names[sid]);
|
|
dbgln("Cound't find string for SID {}, going with space", sid);
|
|
return DeprecatedFlyString("space");
|
|
};
|
|
|
|
auto format = TRY(reader.try_read<Card8>());
|
|
if (format == 0) {
|
|
for (u8 i = 0; i < glyph_count - 1; i++) {
|
|
SID sid = TRY(reader.try_read<BigEndian<SID>>());
|
|
TRY(names.try_append(resolve(sid)));
|
|
}
|
|
} else if (format == 1) {
|
|
while (names.size() < glyph_count - 1) {
|
|
auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
|
|
int left = TRY(reader.try_read<Card8>());
|
|
for (u8 sid = first_sid; left >= 0; left--, sid++)
|
|
TRY(names.try_append(resolve(sid)));
|
|
}
|
|
}
|
|
return names;
|
|
}
|
|
|
|
PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines)
|
|
{
|
|
Vector<Glyph> glyphs;
|
|
TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
|
|
GlyphParserState state;
|
|
auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true));
|
|
return TRY(glyphs.try_append(glyph));
|
|
}));
|
|
return glyphs;
|
|
}
|
|
|
|
PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
|
|
{
|
|
Vector<u8> encoding_codes;
|
|
auto format = TRY(reader.try_read<Card8>());
|
|
if (format == 0) {
|
|
auto n_codes = TRY(reader.try_read<Card8>());
|
|
for (u8 i = 0; i < n_codes; i++) {
|
|
TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
|
|
}
|
|
} else if (format == 1) {
|
|
auto n_ranges = TRY(reader.try_read<Card8>());
|
|
for (u8 i = 0; i < n_ranges; i++) {
|
|
auto first_code = TRY(reader.try_read<Card8>());
|
|
int left = TRY(reader.try_read<Card8>());
|
|
for (u8 code = first_code; left >= 0; left--, code++)
|
|
TRY(encoding_codes.try_append(code));
|
|
}
|
|
} else
|
|
return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
|
|
return encoding_codes;
|
|
}
|
|
|
|
template<typename OperatorT>
|
|
PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
|
|
{
|
|
Vector<DictOperand> operands;
|
|
while (reader.remaining() > 0) {
|
|
auto b0 = reader.read<u8>();
|
|
// A command
|
|
if (b0 <= 21) {
|
|
auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
|
|
TRY(handler(op, operands));
|
|
operands.clear();
|
|
continue;
|
|
}
|
|
// An operand
|
|
TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
|
|
}
|
|
return {};
|
|
}
|
|
|
|
template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
|
|
template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
|
|
|
|
template<typename OperatorT>
|
|
PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
|
|
{
|
|
VERIFY(b0 <= 21);
|
|
if (b0 != 12)
|
|
return OperatorT { (int)b0 };
|
|
auto b1 = TRY(reader.try_read<u8>());
|
|
return OperatorT { b0 << 8 | b1 };
|
|
}
|
|
|
|
template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
|
|
|
|
PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
|
|
{
|
|
Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
|
|
if (count == 0)
|
|
return {};
|
|
auto offset_size = TRY(reader.try_read<OffSize>());
|
|
if (offset_size == 1)
|
|
return parse_index_data<u8>(count, reader, data_handler);
|
|
if (offset_size == 2)
|
|
return parse_index_data<u16>(count, reader, data_handler);
|
|
if (offset_size == 4)
|
|
return parse_index_data<u32>(count, reader, data_handler);
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
template<typename OffsetType>
|
|
PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler)
|
|
{
|
|
OffsetType last_data_end = 1;
|
|
auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1;
|
|
for (u16 i = 0; i < count; i++) {
|
|
reader.save();
|
|
reader.move_by(sizeof(OffsetType) * i);
|
|
OffsetType data_start = reader.read<BigEndian<OffsetType>>();
|
|
last_data_end = reader.read<BigEndian<OffsetType>>();
|
|
auto data_size = last_data_end - data_start;
|
|
reader.move_to(offset_refpoint + data_start);
|
|
TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
|
|
reader.load();
|
|
}
|
|
reader.move_to(offset_refpoint + last_data_end);
|
|
return {};
|
|
}
|
|
|
|
template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&);
|
|
template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&);
|
|
template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&);
|
|
|
|
// 4 DICT DATA, Table 3 Operand Encoding
|
|
int CFF::load_int_dict_operand(u8 b0, Reader& reader)
|
|
{
|
|
if (b0 >= 32 && b0 <= 246) {
|
|
return b0 - 139;
|
|
}
|
|
if (b0 >= 247 && b0 <= 250) {
|
|
auto b1 = reader.read<u8>();
|
|
return (b0 - 247) * 256 + b1 + 108;
|
|
}
|
|
if (b0 >= 251 && b0 <= 254) {
|
|
auto b1 = reader.read<u8>();
|
|
return -(b0 - 251) * 256 - b1 - 108;
|
|
}
|
|
if (b0 == 28) {
|
|
auto b1 = reader.read<u8>();
|
|
auto b2 = reader.read<u8>();
|
|
return b1 << 8 | b2;
|
|
}
|
|
if (b0 == 29) {
|
|
auto b1 = reader.read<u8>();
|
|
auto b2 = reader.read<u8>();
|
|
auto b3 = reader.read<u8>();
|
|
auto b4 = reader.read<u8>();
|
|
return b1 << 24 | b2 << 16 | b3 << 8 | b4;
|
|
}
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
float CFF::load_float_dict_operand(Reader& reader)
|
|
{
|
|
StringBuilder sb;
|
|
auto add_nibble = [&](char nibble) {
|
|
if (nibble < 0xa)
|
|
sb.append('0' + nibble);
|
|
else if (nibble == 0xa)
|
|
sb.append('.');
|
|
else if (nibble == 0xb)
|
|
sb.append('E');
|
|
else if (nibble == 0xc)
|
|
sb.append("E-"sv);
|
|
else if (nibble == 0xe)
|
|
sb.append('-');
|
|
};
|
|
while (true) {
|
|
auto byte = reader.read<u8>();
|
|
char nibble1 = (byte & 0xf0) >> 4;
|
|
char nibble2 = byte & 0x0f;
|
|
if (nibble1 == 0xf)
|
|
break;
|
|
add_nibble(nibble1);
|
|
if (nibble2 == 0xf)
|
|
break;
|
|
add_nibble(nibble2);
|
|
}
|
|
auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
|
|
return result.release_value();
|
|
}
|
|
|
|
PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
|
|
{
|
|
if (b0 == 30)
|
|
return load_float_dict_operand(reader);
|
|
if (b0 >= 28)
|
|
return load_int_dict_operand(b0, reader);
|
|
return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) };
|
|
}
|
|
}
|