mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 11:12:45 +00:00 
			
		
		
		
	 f8a0365002
			
		
	
	
		f8a0365002
		
	
	
	
	
		
			
			This was preventing some unqualified emoji sequences from rendering
properly, such as the custom SerenityOS flag. We rendered the flag
correctly when given the fully qualified sequence:
    U+1F3F3 U+FEOF U+200D U+1F41E
But were not detecting the unqualified sequence as an emoji when also
filtering for emoji-presentation sequences:
    U+1F3F3 U+200D U+1F41E
		
	
			
		
			
				
	
	
		
			126 lines
		
	
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			126 lines
		
	
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #include <AK/CharacterTypes.h>
 | |
| #include <AK/Utf32View.h>
 | |
| #include <AK/Utf8View.h>
 | |
| #include <LibUnicode/CharacterTypes.h>
 | |
| #include <LibUnicode/Emoji.h>
 | |
| 
 | |
| #if ENABLE_UNICODE_DATA
 | |
| #    include <LibUnicode/UnicodeData.h>
 | |
| #endif
 | |
| 
 | |
| namespace Unicode {
 | |
| 
 | |
| Optional<Emoji> __attribute__((weak)) find_emoji_for_code_points(ReadonlySpan<u32>) { return {}; }
 | |
| 
 | |
| #if ENABLE_UNICODE_DATA
 | |
| 
 | |
| // https://unicode.org/reports/tr51/#def_emoji_core_sequence
 | |
| static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32> const& next_code_point, SequenceType type)
 | |
| {
 | |
|     // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence
 | |
| 
 | |
|     static constexpr auto emoji_presentation_selector = 0xFE0Fu;
 | |
|     static constexpr auto combining_enclosing_keycap = 0x20E3u;
 | |
|     static constexpr auto zero_width_joiner = 0x200Du;
 | |
| 
 | |
|     // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
 | |
|     // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
 | |
|     if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*')
 | |
|         return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap;
 | |
| 
 | |
|     // A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence.
 | |
|     if (is_ascii(code_point))
 | |
|         return false;
 | |
| 
 | |
|     // https://unicode.org/reports/tr51/#def_emoji_character
 | |
|     switch (type) {
 | |
|     case SequenceType::Any:
 | |
|         if (code_point_has_property(code_point, Property::Emoji))
 | |
|             return true;
 | |
|         break;
 | |
|     case SequenceType::EmojiPresentation:
 | |
|         if (code_point_has_property(code_point, Property::Emoji_Presentation))
 | |
|             return true;
 | |
|         if (next_code_point == zero_width_joiner && code_point_has_property(code_point, Property::Emoji))
 | |
|             return true;
 | |
|         break;
 | |
|     }
 | |
| 
 | |
|     // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
 | |
|     // emoji_presentation_sequence := emoji_character emoji_presentation_selector
 | |
|     if (next_code_point == emoji_presentation_selector)
 | |
|         return true;
 | |
| 
 | |
|     // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
 | |
|     // emoji_modifier_sequence := emoji_modifier_base emoji_modifier
 | |
|     if (code_point_has_property(code_point, Property::Emoji_Modifier_Base))
 | |
|         return true;
 | |
| 
 | |
|     // https://unicode.org/reports/tr51/#def_emoji_flag_sequence
 | |
|     // emoji_flag_sequence := regional_indicator regional_indicator
 | |
|     if (code_point_has_property(code_point, Property::Regional_Indicator))
 | |
|         return true;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool could_be_start_of_serenity_emoji(u32 code_point)
 | |
| {
 | |
|     // We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00.
 | |
|     static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u;
 | |
| 
 | |
|     return code_point >= first_custom_serenity_emoji_code_point;
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| // https://unicode.org/reports/tr51/#def_emoji_sequence
 | |
| template<typename CodePointIterator>
 | |
| static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [[maybe_unused]] SequenceType type)
 | |
| {
 | |
|     // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence
 | |
| 
 | |
|     if (it.done())
 | |
|         return false;
 | |
| 
 | |
| #if ENABLE_UNICODE_DATA
 | |
|     // The purpose of this method is to quickly filter out code points that cannot be the start of
 | |
|     // an emoji. The emoji_core_sequence definition alone captures the start of all possible
 | |
|     // emoji_zwj_sequence and emoji_tag_sequence emojis, because:
 | |
|     //
 | |
|     //     * emoji_zwj_sequence must begin with emoji_zwj_element, which is:
 | |
|     //       emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence
 | |
|     //
 | |
|     //     * emoji_tag_sequence must begin with tag_base, which is:
 | |
|     //       tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence
 | |
|     //       Note that this is a subset of emoji_core_sequence.
 | |
|     auto code_point = *it;
 | |
|     auto next_code_point = it.peek(1);
 | |
| 
 | |
|     if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type))
 | |
|         return true;
 | |
|     if (could_be_start_of_serenity_emoji(code_point))
 | |
|         return true;
 | |
|     return false;
 | |
| #else
 | |
|     return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type)
 | |
| {
 | |
|     return could_be_start_of_emoji_sequence_impl(it, type);
 | |
| }
 | |
| 
 | |
| bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type)
 | |
| {
 | |
|     return could_be_start_of_emoji_sequence_impl(it, type);
 | |
| }
 | |
| 
 | |
| }
 |