From 42c272c059a7f44daa8bcb94d3eb9517084f773a Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 28 Feb 2023 08:02:02 -0500 Subject: [PATCH] LibUnicode: Allow ignoring text presentation emoji in sequence detection This adds an option to only detect emoji that should always present as emoji. For example, the copyright symbol (unless followed by an emoji presentation selector) should render as text. --- Userland/Libraries/LibUnicode/Emoji.cpp | 26 ++++++++++++++++--------- Userland/Libraries/LibUnicode/Emoji.h | 9 +++++++-- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Userland/Libraries/LibUnicode/Emoji.cpp b/Userland/Libraries/LibUnicode/Emoji.cpp index 32e45d9351..c6bbad1f36 100644 --- a/Userland/Libraries/LibUnicode/Emoji.cpp +++ b/Userland/Libraries/LibUnicode/Emoji.cpp @@ -21,7 +21,7 @@ Optional __attribute__((weak)) find_emoji_for_code_points(ReadonlySpan const& next_code_point) +static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional const& next_code_point, SequenceType type) { // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence @@ -38,8 +38,16 @@ static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional return false; // https://unicode.org/reports/tr51/#def_emoji_character - if (code_point_has_property(code_point, Property::Emoji)) - return true; + switch (type) { + case SequenceType::Any: + if (code_point_has_property(code_point, Property::Emoji)) + return true; + break; + case SequenceType::EmojiPresentation: + if (code_point_has_property(code_point, Property::Emoji_Presentation)) + return true; + break; + } // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence // emoji_presentation_sequence := emoji_character emoji_presentation_selector @@ -71,7 +79,7 @@ static bool could_be_start_of_serenity_emoji(u32 code_point) // https://unicode.org/reports/tr51/#def_emoji_sequence template -static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it) +static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [[maybe_unused]] SequenceType type) { // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence @@ -92,7 +100,7 @@ static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it) auto code_point = *it; auto next_code_point = it.peek(1); - if (could_be_start_of_emoji_core_sequence(code_point, next_code_point)) + if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type)) return true; if (could_be_start_of_serenity_emoji(code_point)) return true; @@ -102,14 +110,14 @@ static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it) #endif } -bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it) +bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type) { - return could_be_start_of_emoji_sequence_impl(it); + return could_be_start_of_emoji_sequence_impl(it, type); } -bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it) +bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type) { - return could_be_start_of_emoji_sequence_impl(it); + return could_be_start_of_emoji_sequence_impl(it, type); } } diff --git a/Userland/Libraries/LibUnicode/Emoji.h b/Userland/Libraries/LibUnicode/Emoji.h index 7e772e9c3e..0104cee693 100644 --- a/Userland/Libraries/LibUnicode/Emoji.h +++ b/Userland/Libraries/LibUnicode/Emoji.h @@ -47,8 +47,13 @@ Optional find_emoji_for_code_points(u32 const (&code_points)[Size]) return find_emoji_for_code_points(ReadonlySpan { code_points }); } -bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const&); -bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const&); +enum class SequenceType { + Any, + EmojiPresentation, +}; + +bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const&, SequenceType = SequenceType::Any); +bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const&, SequenceType = SequenceType::Any); constexpr StringView emoji_group_to_string(EmojiGroup group) {