diff --git a/Userland/Libraries/LibPDF/Encoding.cpp b/Userland/Libraries/LibPDF/Encoding.cpp index 9c951da309..c21767afa3 100644 --- a/Userland/Libraries/LibPDF/Encoding.cpp +++ b/Userland/Libraries/LibPDF/Encoding.cpp @@ -11,15 +11,9 @@ namespace PDF { -PDFErrorOr> Encoding::create(HashMap descriptors) +NonnullRefPtr Encoding::create() { - auto encoding = adopt_ref(*new Encoding()); - encoding->m_descriptors = descriptors; - - for (auto& descriptor : descriptors) - encoding->m_name_mapping.set(descriptor.value.name, descriptor.value.code_point); - - return encoding; + return adopt_ref(*new Encoding()); } PDFErrorOr> Encoding::from_object(Document* document, NonnullRefPtr const& obj) @@ -49,8 +43,8 @@ PDFErrorOr> Encoding::from_object(Document* document, No auto encoding = adopt_ref(*new Encoding()); - encoding->m_descriptors = base_encoding->descriptors(); - encoding->m_name_mapping = base_encoding->name_mapping(); + encoding->m_descriptors = base_encoding->m_descriptors; + encoding->m_name_mapping = base_encoding->m_name_mapping; auto differences_array = TRY(dict->get_array(document, CommonNames::Differences)); @@ -66,8 +60,7 @@ PDFErrorOr> Encoding::from_object(Document* document, No VERIFY(!first); auto& object = item.get>(); auto name = object->cast()->name(); - - encoding->m_descriptors.set(current_code_point, { name, base_encoding->m_name_mapping.ensure(name) }); + encoding->set(current_code_point, name); current_code_point++; } } @@ -75,13 +68,18 @@ PDFErrorOr> Encoding::from_object(Document* document, No return encoding; } +void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name) +{ + m_descriptors.set(char_code, glyph_name); + m_name_mapping.set(glyph_name, char_code); +} + NonnullRefPtr Encoding::standard_encoding() { static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ - encoding->m_descriptors.set(standard_code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, standard_code); + encoding->set(standard_code, #name); ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) #undef ENUMERATE } @@ -94,8 +92,7 @@ NonnullRefPtr Encoding::mac_encoding() static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ - encoding->m_descriptors.set(mac_code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, mac_code); + encoding->set(mac_code, #name); ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) #undef ENUMERATE } @@ -108,13 +105,20 @@ NonnullRefPtr Encoding::windows_encoding() static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ - encoding->m_descriptors.set(win_code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, win_code); + encoding->set(win_code, #name); ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) #undef ENUMERATE - encoding->m_windows = true; - } + // PDF Annex D table D.2, note 3: + // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only + // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment. + // + // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code. + for (CharCodeType char_code = 255; char_code > 040; char_code--) { + if (!encoding->m_descriptors.contains(char_code)) + encoding->set(char_code, "bullet"); + } + } return encoding; } @@ -123,8 +127,7 @@ NonnullRefPtr Encoding::pdf_doc_encoding() static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { #define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ - encoding->m_descriptors.set(pdf_code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, pdf_code); + encoding->set(pdf_code, #name); ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) #undef ENUMERATE } @@ -136,9 +139,8 @@ NonnullRefPtr Encoding::symbol_encoding() { static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { -#define ENUMERATE(name, code) \ - encoding->m_descriptors.set(code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, code); +#define ENUMERATE(name, code) \ + encoding->set(code, #name); ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE) #undef ENUMERATE } @@ -150,21 +152,14 @@ NonnullRefPtr Encoding::zapf_encoding() { static NonnullRefPtr encoding = adopt_ref(*new Encoding()); if (encoding->m_descriptors.is_empty()) { -#define ENUMERATE(name, code) \ - encoding->m_descriptors.set(code, { #name, 0 }); \ - encoding->m_name_mapping.set(#name, code); +#define ENUMERATE(name, code) \ + encoding->set(code, #name); ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE) #undef ENUMERATE } - return encoding; } -CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const -{ - return const_cast(this)->m_descriptors.ensure(char_code); -} - u16 Encoding::get_char_code(DeprecatedString const& name) const { auto code_iterator = m_name_mapping.find(name); @@ -173,12 +168,4 @@ u16 Encoding::get_char_code(DeprecatedString const& name) const return 0; } -bool Encoding::should_map_to_bullet(u16 char_code) const -{ - // PDF Annex D table D.2, note 3: - // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only - // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment. - return m_windows && char_code > 040 && !m_descriptors.contains(char_code); -} - } diff --git a/Userland/Libraries/LibPDF/Encoding.h b/Userland/Libraries/LibPDF/Encoding.h index c2f6af07d8..d25a52b82c 100644 --- a/Userland/Libraries/LibPDF/Encoding.h +++ b/Userland/Libraries/LibPDF/Encoding.h @@ -625,14 +625,10 @@ namespace PDF { -struct CharDescriptor { - DeprecatedString name; - u32 code_point; -}; - class Encoding : public RefCounted { public: - static PDFErrorOr> create(HashMap descriptors); + using CharCodeType = u8; + static NonnullRefPtr create(); static PDFErrorOr> from_object(Document*, NonnullRefPtr const&); static NonnullRefPtr standard_encoding(); @@ -642,17 +638,14 @@ public: static NonnullRefPtr symbol_encoding(); static NonnullRefPtr zapf_encoding(); - HashMap const& descriptors() const { return m_descriptors; } - HashMap const& name_mapping() const { return m_name_mapping; } + HashMap const& name_mapping() const { return m_name_mapping; } u16 get_char_code(DeprecatedString const&) const; - CharDescriptor const& get_char_code_descriptor(u16 char_code) const; - - bool should_map_to_bullet(u16 char_code) const; + void set(CharCodeType char_code, DeprecatedFlyString const& glyph_name); protected: - HashMap m_descriptors; - HashMap m_name_mapping; + HashMap m_descriptors; + HashMap m_name_mapping; bool m_windows { false }; }; diff --git a/Userland/Libraries/LibPDF/Fonts/CFF.cpp b/Userland/Libraries/LibPDF/Fonts/CFF.cpp index 18e2290787..2aad6a1b07 100644 --- a/Userland/Libraries/LibPDF/Fonts/CFF.cpp +++ b/Userland/Libraries/LibPDF/Fonts/CFF.cpp @@ -121,19 +121,19 @@ PDFErrorOr> CFF::create(ReadonlyBytes const& cff_bytes, RefPt } cff->set_encoding(move(encoding)); } else { - HashMap descriptors; + auto encoding = Encoding::create(); for (size_t i = 0; i < glyphs.size(); i++) { if (i == 0) { TRY(cff->add_glyph(0, move(glyphs[0]))); - descriptors.set(0, CharDescriptor { ".notdef", 0 }); + encoding->set(0, ".notdef"); continue; } auto code = encoding_codes[i - 1]; auto char_name = charset[i - 1]; TRY(cff->add_glyph(code, move(glyphs[i]))); - descriptors.set(code, CharDescriptor { char_name, code }); + encoding->set(code, char_name); } - cff->set_encoding(TRY(Encoding::create(descriptors))); + cff->set_encoding(move(encoding)); } return cff; diff --git a/Userland/Libraries/LibPDF/Fonts/PS1FontProgram.cpp b/Userland/Libraries/LibPDF/Fonts/PS1FontProgram.cpp index 99e70ba799..534ccdd77a 100644 --- a/Userland/Libraries/LibPDF/Fonts/PS1FontProgram.cpp +++ b/Userland/Libraries/LibPDF/Fonts/PS1FontProgram.cpp @@ -36,19 +36,18 @@ PDFErrorOr> PS1FontProgram::create(ReadonlyBytes if (TRY(parse_word(reader)) == "StandardEncoding") { font_program->set_encoding(Encoding::standard_encoding()); } else { - HashMap descriptors; - + auto encoding = Encoding::create(); while (reader.remaining()) { auto word = TRY(parse_word(reader)); if (word == "readonly") { break; } else if (word == "dup") { - u32 char_code = TRY(parse_int(reader)); + u8 char_code = TRY(parse_int(reader)); auto name = TRY(parse_word(reader)); - descriptors.set(char_code, { name.starts_with('/') ? name.substring_view(1) : name.view(), char_code }); + encoding->set(char_code, name.starts_with('/') ? name.substring_view(1) : name.view()); } } - font_program->set_encoding(TRY(Encoding::create(descriptors))); + font_program->set_encoding(move(encoding)); } }