1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 11:18:11 +00:00
serenity/Userland/Libraries/LibGfx/Font/Emoji.cpp
Ryan Liptak 379baa984d LibGfx: Always lookup emojis without emoji presentation specifiers
This allows us to treat unqualified, minimally-qualified, and
fully-qualified emojis the same as long as emoji filenames are in their
least qualified form (with respect to emoji presentation).

For example, the transgender flag emoji has 4 possible forms:

    1F3F3 FE0F 200D 26A7 FE0F ; fully-qualified  # 🏳️‍⚧️
    1F3F3 200D 26A7 FE0F      ; unqualified      # 🏳‍⚧️
    1F3F3 FE0F 200D 26A7      ; unqualified      # 🏳️‍⚧
    1F3F3 200D 26A7           ; unqualified      # 🏳‍⚧

In order to treat them all as the same, we now drop all forms down
to 1F3F3 200D 26A7 (skipping any FE0F codepoints) and then do the
lookup for that form.
2022-08-20 23:50:41 +01:00

133 lines
5.2 KiB
C++

/*
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
* Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/HashMap.h>
#include <AK/Span.h>
#include <AK/String.h>
#include <AK/Utf8View.h>
#include <LibGfx/Bitmap.h>
#include <LibGfx/Font/Emoji.h>
namespace Gfx {
// https://unicode.org/reports/tr51/
// https://unicode.org/emoji/charts/emoji-list.html
// https://unicode.org/emoji/charts/emoji-zwj-sequences.html
static HashMap<String, RefPtr<Gfx::Bitmap>> s_emojis;
Bitmap const* Emoji::emoji_for_code_point(u32 code_point)
{
return emoji_for_code_points(Array { code_point });
}
Bitmap const* Emoji::emoji_for_code_points(Span<u32> const& code_points)
{
// FIXME: This function is definitely not fast.
auto basename = String::join('_', code_points, "U+{:X}"sv);
auto it = s_emojis.find(basename);
if (it != s_emojis.end())
return (*it).value.ptr();
auto bitmap_or_error = Bitmap::try_load_from_file(String::formatted("/res/emoji/{}.png", basename));
if (bitmap_or_error.is_error()) {
s_emojis.set(basename, nullptr);
return nullptr;
}
auto bitmap = bitmap_or_error.release_value();
s_emojis.set(basename, bitmap);
return bitmap.ptr();
}
Bitmap const* Emoji::emoji_for_code_point_iterator(Utf8CodePointIterator& it)
{
// NOTE: I'm sure this could be more efficient, e.g. by checking if each code point falls
// into a certain range in the loop below (emojis, modifiers, variation selectors, ZWJ),
// and bailing out early if not. Current worst case is 10 file lookups for any sequence of
// code points (if the first glyph isn't part of the font in regular text rendering).
constexpr size_t max_emoji_code_point_sequence_length = 10;
Vector<u32, max_emoji_code_point_sequence_length> code_points;
struct EmojiAndCodePoints {
Bitmap const* emoji;
Span<u32> code_points;
u8 real_codepoint_length;
};
Vector<EmojiAndCodePoints, max_emoji_code_point_sequence_length> possible_emojis;
// Determine all existing emojis for the longest possible ZWJ emoji sequence,
// or until we run out of code points in the iterator.
bool last_codepoint_sequence_found = false;
for (u8 i = 0; i < max_emoji_code_point_sequence_length; ++i) {
auto code_point = it.peek(i);
if (!code_point.has_value())
break;
// NOTE: The following only applies to emoji presentation, not to other
// emoji modifiers.
//
// For a single emoji core sequence, we assume that emoji presentation
// is implied, since this function will only be called for characters
// with default text presentation when either (1) the character is not
// found in the font, or (2) the character is followed by an explicit
// emoji presentation selector.
//
// For emoji zwj sequences, Serenity chooses to treat minimally-qualified
// and unqualified emojis the same as fully-qualified emojis (with regards
// to emoji presentation).
//
// From https://unicode.org/reports/tr51/#Emoji_Implementation_Notes:
// > minimally-qualified or unqualified emoji zwj sequences may be handled
// > in the same way as their fully-qualified forms; the choice is up to
// > the implementation.
//
// In both cases, whenever an emoji presentation selector (U+FE0F) is found, we
// just skip it in order to drop fully-qualified emojis down to their
// minimally-qualified or unqualified forms (with respect to emoji presentation)
// for doing emoji lookups. This ensures that all forms are treated the same
// assuming the emoji filenames are named accordingly (with all emoji presentation
// selector codepoints removed).
if (code_point.value() == 0xFE0F) {
// If the last sequence was found, then we can just update
// its real length.
if (last_codepoint_sequence_found) {
possible_emojis.last().real_codepoint_length++;
}
// And we can always skip the lookup since the code point sequence
// will be unchanged since last time.
continue;
} else {
code_points.append(*code_point);
}
if (auto const* emoji = emoji_for_code_points(code_points)) {
u8 real_codepoint_length = i + 1;
possible_emojis.empend(emoji, code_points, real_codepoint_length);
last_codepoint_sequence_found = true;
} else {
last_codepoint_sequence_found = false;
}
}
if (possible_emojis.is_empty())
return nullptr;
// If we found one or more matches, return the longest, i.e. last. For example:
// U+1F3F3 - white flag
// U+1F3F3 U+200D U+1F308 - rainbow flag (unqualified form)
auto& [emoji, emoji_code_points, codepoint_length] = possible_emojis.last();
// Advance the iterator, so it's on the last code point of our found emoji and
// whoever is iterating will advance to the next new code point.
for (u8 i = 0; i < codepoint_length - 1; ++i)
++it;
return emoji;
}
}