From 675237180f3e67e3f7e83640edd98aa70543d507 Mon Sep 17 00:00:00 2001 From: Srimanta Barua Date: Thu, 4 Jun 2020 17:37:08 +0530 Subject: [PATCH] LibGfx: Get glyph ID for codepoint, from the CMAP table. --- Libraries/LibGfx/TTFont.cpp | 235 ++++++++++++++++++++++++++++++++---- Libraries/LibGfx/TTFont.h | 156 +++++++++++++++++++++--- 2 files changed, 351 insertions(+), 40 deletions(-) diff --git a/Libraries/LibGfx/TTFont.cpp b/Libraries/LibGfx/TTFont.cpp index 02daf33fab..8aab1fbd1d 100644 --- a/Libraries/LibGfx/TTFont.cpp +++ b/Libraries/LibGfx/TTFont.cpp @@ -30,6 +30,7 @@ #include namespace Gfx { +namespace TTF { static u16 be_u16(const u8* ptr) { @@ -51,50 +52,196 @@ static u32 tag_from_str(const char *str) return be_u32((const u8*) str); } -u16 TTFHead::units_per_em() const +u16 Head::units_per_em() const { return be_u16(m_slice.offset_pointer(18)); } -i16 TTFHead::xmin() const +i16 Head::xmin() const { return be_i16(m_slice.offset_pointer(36)); } -i16 TTFHead::ymin() const +i16 Head::ymin() const { return be_i16(m_slice.offset_pointer(38)); } -i16 TTFHead::xmax() const +i16 Head::xmax() const { return be_i16(m_slice.offset_pointer(40)); } -i16 TTFHead::ymax() const +i16 Head::ymax() const { return be_i16(m_slice.offset_pointer(42)); } -u16 TTFHead::lowest_recommended_ppem() const +u16 Head::lowest_recommended_ppem() const { return be_u16(m_slice.offset_pointer(46)); } -Result TTFHead::index_to_loc_format() const +Result Head::index_to_loc_format() const { i16 raw = be_i16(m_slice.offset_pointer(50)); switch (raw) { case 0: - return TTFIndexToLocFormat::Offset16; + return IndexToLocFormat::Offset16; case 1: - return TTFIndexToLocFormat::Offset32; + return IndexToLocFormat::Offset32; default: return raw; } } -OwnPtr TTFont::load_from_file(const StringView& path, unsigned index) +u16 Hhea::number_of_h_metrics() const +{ + return be_u16(m_slice.offset_pointer(34)); +} + +u16 Maxp::num_glyphs() const +{ + return be_u16(m_slice.offset_pointer(4)); +} + +GlyphHorizontalMetrics Hmtx::get_glyph_horizontal_metrics(u32 glyph_id) const +{ + ASSERT(glyph_id < m_num_glyphs); + auto offset = glyph_id * 2; + i16 left_side_bearing = be_i16(m_slice.offset_pointer(offset + 2)); + if (glyph_id < m_number_of_h_metrics) { + u16 advance_width = be_u16(m_slice.offset_pointer(offset)); + return GlyphHorizontalMetrics { + .advance_width = advance_width, + .left_side_bearing = left_side_bearing, + }; + } else { + u16 advance_width = be_u16(m_slice.offset_pointer((m_number_of_h_metrics - 1) * 2)); + return GlyphHorizontalMetrics { + .advance_width = advance_width, + .left_side_bearing = left_side_bearing, + }; + } +} + +CmapSubtablePlatform CmapSubtable::platform_id() const +{ + switch (m_raw_platform_id) { + case 0: return CmapSubtablePlatform::Unicode; + case 1: return CmapSubtablePlatform::Macintosh; + case 3: return CmapSubtablePlatform::Windows; + case 4: return CmapSubtablePlatform::Custom; + default: ASSERT_NOT_REACHED(); + } +} + +CmapSubtableFormat CmapSubtable::format() const +{ + switch (be_u16(m_slice.offset_pointer(0))) { + case 0: return CmapSubtableFormat::ByteEncoding; + case 2: return CmapSubtableFormat::HighByte; + case 4: return CmapSubtableFormat::SegmentToDelta; + case 6: return CmapSubtableFormat::TrimmedTable; + case 8: return CmapSubtableFormat::Mixed16And32; + case 10: return CmapSubtableFormat::TrimmedArray; + case 12: return CmapSubtableFormat::SegmentedCoverage; + case 13: return CmapSubtableFormat::ManyToOneRange; + case 14: return CmapSubtableFormat::UnicodeVariationSequences; + default: ASSERT_NOT_REACHED(); + } +} + +u32 Cmap::num_subtables() const +{ + return be_u16(m_slice.offset_pointer(2)); +} + +Optional Cmap::subtable(u32 index) const +{ + if (index >= num_subtables()) { + return {}; + } + u32 record_offset = 4 + index * 8; + u16 platform_id = be_u16(m_slice.offset_pointer(record_offset)); + u16 encoding_id = be_u16(m_slice.offset_pointer(record_offset + 2)); + u32 subtable_offset = be_u32(m_slice.offset_pointer(record_offset + 4)); + ASSERT(subtable_offset < m_slice.size()); + auto subtable_slice = ByteBuffer::wrap(m_slice.offset_pointer(subtable_offset), m_slice.size() - subtable_offset); + return CmapSubtable(move(subtable_slice), platform_id, encoding_id); +} + +// FIXME: This only handles formats 4 (SegmentToDelta) and 12 (SegmentedCoverage) for now. +u32 CmapSubtable::glyph_id_for_codepoint(u32 codepoint) const +{ + switch (format()) { + case CmapSubtableFormat::SegmentToDelta: + return glyph_id_for_codepoint_table_4(codepoint); + case CmapSubtableFormat::SegmentedCoverage: + return glyph_id_for_codepoint_table_12(codepoint); + default: + return 0; + } +} + +u32 CmapSubtable::glyph_id_for_codepoint_table_4(u32 codepoint) const +{ + u32 segcount_x2 = be_u16(m_slice.offset_pointer(6)); + if (m_slice.size() < segcount_x2 * 4 + 16) { + return 0; + } + for (u32 offset = 0; offset < segcount_x2; offset += 2) { + u32 end_codepoint = be_u16(m_slice.offset_pointer(14 + offset)); + if (codepoint > end_codepoint) { + continue; + } + u32 start_codepoint = be_u16(m_slice.offset_pointer(16 + segcount_x2 + offset)); + if (codepoint < start_codepoint) { + break; + } + u32 delta = be_u16(m_slice.offset_pointer(16 + segcount_x2 * 2 + offset)); + u32 range = be_u16(m_slice.offset_pointer(16 + segcount_x2 * 3 + offset)); + if (range == 0) { + return (codepoint + delta) & 0xffff; + } else { + u32 glyph_offset = 16 + segcount_x2 * 3 + offset + range + (codepoint - start_codepoint) * 2; + ASSERT(glyph_offset + 2 <= m_slice.size()); + return (be_u16(m_slice.offset_pointer(glyph_offset)) + delta) & 0xffff; + } + } + return 0; +} + +u32 CmapSubtable::glyph_id_for_codepoint_table_12(u32 codepoint) const +{ + u32 num_groups = be_u32(m_slice.offset_pointer(12)); + ASSERT(m_slice.size() >= 16 + 12 * num_groups); + for (u32 offset = 0; offset < num_groups * 12; offset += 12) { + u32 start_codepoint = be_u32(m_slice.offset_pointer(16 + offset)); + if (codepoint < start_codepoint) { + break; + } + u32 end_codepoint = be_u32(m_slice.offset_pointer(20 + offset)); + if (codepoint > end_codepoint) { + continue; + } + u32 glyph_offset = be_u32(m_slice.offset_pointer(24 + offset)); + return codepoint - start_codepoint + glyph_offset; + } + return 0; +} + +u32 Cmap::glyph_id_for_codepoint(u32 codepoint) const +{ + auto opt_subtable = subtable(m_active_index); + if (!opt_subtable.has_value()) { + return 0; + } + auto subtable = opt_subtable.value(); + return subtable.glyph_id_for_codepoint(codepoint); +} + +OwnPtr Font::load_from_file(const StringView& path, unsigned index) { dbg() << "path: " << path << " | index: " << index; auto file_or_error = Core::File::open(String(path), Core::IODevice::ReadOnly); @@ -120,23 +267,27 @@ OwnPtr TTFont::load_from_file(const StringView& path, unsigned index) return nullptr; } u32 offset = be_u32(buffer.offset_pointer(12 + 4 * index)); - return OwnPtr(new TTFont(move(buffer), offset)); + return OwnPtr(new Font(move(buffer), offset)); } else if (tag == tag_from_str("OTTO")) { dbg() << "CFF fonts not supported yet"; return nullptr; } else if (tag != 0x00010000) { - dbg() << "Not a valid TTF font"; + dbg() << "Not a valid font"; return nullptr; } else { - return OwnPtr(new TTFont(move(buffer), 0)); + return OwnPtr(new Font(move(buffer), 0)); } } -TTFont::TTFont(AK::ByteBuffer&& buffer, u32 offset) +Font::Font(AK::ByteBuffer&& buffer, u32 offset) : m_buffer(move(buffer)) { ASSERT(m_buffer.size() >= offset + 12); - bool head_has_been_initialized = false; + Optional head_slice = {}; + Optional hhea_slice = {}; + Optional maxp_slice = {}; + Optional hmtx_slice = {}; + Optional cmap_slice = {}; //auto sfnt_version = be_u32(data + offset); auto num_tables = be_u16(m_buffer.offset_pointer(offset + 4)); @@ -148,16 +299,58 @@ TTFont::TTFont(AK::ByteBuffer&& buffer, u32 offset) u32 table_offset = be_u32(m_buffer.offset_pointer(record_offset + 8)); u32 table_length = be_u32(m_buffer.offset_pointer(record_offset + 12)); ASSERT(m_buffer.size() >= table_offset + table_length); + auto buffer = ByteBuffer::wrap(m_buffer.offset_pointer(table_offset), table_length); - // Get the tables we need + // Get the table offsets we need. if (tag == tag_from_str("head")) { - auto buffer = ByteBuffer::wrap(m_buffer.offset_pointer(table_offset), table_length); - m_head = TTFHead(move(buffer)); - head_has_been_initialized = true; + head_slice = move(buffer); + } else if (tag == tag_from_str("hhea")) { + hhea_slice = move(buffer); + } else if (tag == tag_from_str("maxp")) { + maxp_slice = move(buffer); + } else if (tag == tag_from_str("hmtx")) { + hmtx_slice = move(buffer); + } else if (tag == tag_from_str("cmap")) { + cmap_slice = move(buffer); } } - // Check that we've got everything we need - ASSERT(head_has_been_initialized); + // Check that we've got everything we need. + ASSERT(head_slice.has_value()); + ASSERT(hhea_slice.has_value()); + ASSERT(maxp_slice.has_value()); + ASSERT(hmtx_slice.has_value()); + ASSERT(cmap_slice.has_value()); + + // Load the tables. + m_head = Head(move(head_slice.value())); + m_hhea = Hhea(move(hhea_slice.value())); + m_maxp = Maxp(move(maxp_slice.value())); + m_hmtx = Hmtx(move(hmtx_slice.value()), m_maxp.num_glyphs(), m_hhea.number_of_h_metrics()); + m_cmap = Cmap(move(cmap_slice.value())); + + // Select cmap table. FIXME: Do this better. Right now, just looks for platform "Windows" + // and corresponding encoding "Unicode full repertoire", or failing that, "Unicode BMP" + for (u32 i = 0; i < m_cmap.num_subtables(); i++) { + auto opt_subtable = m_cmap.subtable(i); + if (!opt_subtable.has_value()) { + continue; + } + auto subtable = opt_subtable.value(); + if (subtable.platform_id() == CmapSubtablePlatform::Windows) { + if (subtable.encoding_id() == 10) { + m_cmap.set_active_index(i); + break; + } + if (subtable.encoding_id() == 1) { + m_cmap.set_active_index(i); + break; + } + } + } + + dbg() << "Glyph ID for 'A': " << m_cmap.glyph_id_for_codepoint('A'); + dbg() << "Glyph ID for 'B': " << m_cmap.glyph_id_for_codepoint('B'); } } +} diff --git a/Libraries/LibGfx/TTFont.h b/Libraries/LibGfx/TTFont.h index 769e7b519b..e75954c2cb 100644 --- a/Libraries/LibGfx/TTFont.h +++ b/Libraries/LibGfx/TTFont.h @@ -32,28 +32,22 @@ #include namespace Gfx { +namespace TTF { -class TTFont; +class Font; -enum class TTFIndexToLocFormat { +enum class IndexToLocFormat { Offset16, Offset32, }; -class TTFHead { +class Head { private: - TTFHead() {} - TTFHead(ByteBuffer&& slice) + Head() {} + Head(ByteBuffer&& slice) : m_slice(move(slice)) { ASSERT(m_slice.size() >= 54); - dbg() << "HEAD:" - << "\n units_per_em: " << units_per_em() - << "\n xmin: " << xmin() - << "\n ymin: " << ymin() - << "\n xmax: " << xmax() - << "\n ymax: " << ymax() - << "\n lowest_recommended_ppem: " << lowest_recommended_ppem(); } u16 units_per_em() const; i16 xmin() const; @@ -61,23 +55,147 @@ private: i16 xmax() const; i16 ymax() const; u16 lowest_recommended_ppem() const; - Result index_to_loc_format() const; + Result index_to_loc_format() const; ByteBuffer m_slice; - bool m_is_init; - friend TTFont; + friend Font; }; -class TTFont { +class Hhea { +private: + Hhea() {} + Hhea(ByteBuffer&& slice) + : m_slice(move(slice)) + { + ASSERT(m_slice.size() >= 36); + } + u16 number_of_h_metrics() const; + + ByteBuffer m_slice; + + friend Font; +}; + +class Maxp { +private: + Maxp() {} + Maxp(ByteBuffer&& slice) + : m_slice(move(slice)) + { + ASSERT(m_slice.size() >= 6); + } + u16 num_glyphs() const; + + ByteBuffer m_slice; + + friend Font; +}; + +struct GlyphHorizontalMetrics { + u16 advance_width; + i16 left_side_bearing; +}; + +class Hmtx { +private: + Hmtx() {} + Hmtx(ByteBuffer&& slice, u32 num_glyphs, u32 number_of_h_metrics) + : m_slice(move(slice)) + , m_num_glyphs(num_glyphs) + , m_number_of_h_metrics(number_of_h_metrics) + { + ASSERT(m_slice.size() >= number_of_h_metrics * 2 + num_glyphs * 2); + } + GlyphHorizontalMetrics get_glyph_horizontal_metrics(u32 glyph_id) const; + + ByteBuffer m_slice; + u32 m_num_glyphs; + u32 m_number_of_h_metrics; + + friend Font; +}; + +enum class CmapSubtablePlatform { + Unicode, + Macintosh, + Windows, + Custom, +}; + +enum class CmapSubtableFormat { + ByteEncoding, + HighByte, + SegmentToDelta, + TrimmedTable, + Mixed16And32, + TrimmedArray, + SegmentedCoverage, + ManyToOneRange, + UnicodeVariationSequences, +}; + +class Cmap; + +class CmapSubtable { public: - static OwnPtr load_from_file(const StringView& path, unsigned index); + CmapSubtablePlatform platform_id() const; + u16 encoding_id() const { return m_encoding_id; } + CmapSubtableFormat format() const; private: - TTFont(AK::ByteBuffer&& buffer, u32 offset); + CmapSubtable(ByteBuffer&& slice, u16 platform_id, u16 encoding_id) + : m_slice(move(slice)) + , m_raw_platform_id(platform_id) + , m_encoding_id(encoding_id) + { + } + // Returns 0 if glyph not found. This corresponds to the "missing glyph" + u32 glyph_id_for_codepoint(u32 codepoint) const; + u32 glyph_id_for_codepoint_table_4(u32 codepoint) const; + u32 glyph_id_for_codepoint_table_12(u32 codepoint) const; + + ByteBuffer m_slice; + u16 m_raw_platform_id; + u16 m_encoding_id; + + friend Cmap; +}; + +class Cmap { +private: + Cmap() {} + Cmap(ByteBuffer&& slice) + : m_slice(move(slice)) + { + ASSERT(m_slice.size() > 4); + } + u32 num_subtables() const; + Optional subtable(u32 index) const; + void set_active_index(u32 index) { m_active_index = index; } + // Returns 0 if glyph not found. This corresponds to the "missing glyph" + u32 glyph_id_for_codepoint(u32 codepoint) const; + + ByteBuffer m_slice; + u32 m_active_index { UINT32_MAX }; + + friend Font; +}; + +class Font { +public: + static OwnPtr load_from_file(const StringView& path, unsigned index); + +private: + Font(AK::ByteBuffer&& buffer, u32 offset); AK::ByteBuffer m_buffer; - TTFHead m_head; + Head m_head; + Hhea m_hhea; + Maxp m_maxp; + Hmtx m_hmtx; + Cmap m_cmap; }; } +}