From b7ef36aa3647a86a43af8264d0afaa59f3a7eaef Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Fri, 9 Sep 2022 09:51:03 -0400 Subject: [PATCH] LibUnicode: Parse and generate custom emoji added for SerenityOS Parse emoji from emoji-serenity.txt to allow displaying their names and grouping them together in the EmojiInputDialog. This also adds an "Unknown" value to the EmojiGroup enum. This will be useful for emoji that aren't found in the UCD, or for when UCD downloads are disabled. --- Meta/CMake/unicode_data.cmake | 3 +- .../LibUnicode/GenerateEmojiData.cpp | 56 +++++++++++++++++++ Userland/Libraries/LibUnicode/Emoji.h | 15 ++++- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index 2751035068..0859242f32 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -63,6 +63,7 @@ set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt") set(EMOJI_GENERATOR_PATH "${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh") set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji") +set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt") set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt") if (ENABLE_UNICODE_DATABASE_DOWNLOAD) @@ -123,7 +124,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${UNICODE_META_TARGET_PREFIX}" "${EMOJI_DATA_HEADER}" "${EMOJI_DATA_IMPLEMENTATION}" - arguments -e "${EMOJI_TEST_PATH}" + arguments -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" ) if (CMAKE_CURRENT_BINARY_DIR MATCHES ".*/LibUnicode") # Serenity build. diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp index 3fe2b895e2..6a9d53974c 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp @@ -5,6 +5,7 @@ */ #include "GeneratorUtil.h" +#include #include #include #include @@ -87,6 +88,57 @@ static ErrorOr parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo return {}; } +static ErrorOr parse_emoji_serenity_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data) +{ + static constexpr auto code_point_header = "U+"sv; + + Array buffer; + + auto display_order = static_cast(emoji_data.emojis.size()) + 1u; + + while (TRY(file.can_read_line())) { + auto line = TRY(file.read_line(buffer)); + if (line.is_empty()) + continue; + + auto index = line.find(code_point_header); + if (!index.has_value()) + continue; + + line = line.substring_view(*index); + StringBuilder builder; + + Emoji emoji {}; + emoji.group = Unicode::EmojiGroup::SerenityOS; + emoji.display_order = display_order++; + + line.for_each_split_view(' ', false, [&](auto segment) { + if (segment.starts_with(code_point_header)) { + segment = segment.substring_view(code_point_header.length()); + + auto code_point = AK::StringUtils::convert_to_uint_from_hex(segment); + VERIFY(code_point.has_value()); + + emoji.code_points.append(*code_point); + } else { + if (!builder.is_empty()) + builder.append(' '); + builder.append(segment); + } + }); + + auto name = builder.build(); + if (!any_of(name, is_ascii_lower_alpha)) + name = name.to_titlecase(); + + emoji.name = emoji_data.unique_strings.ensure(move(name)); + emoji.code_points_name = String::join('_', emoji.code_points); + TRY(emoji_data.emojis.try_append(move(emoji))); + } + + return {}; +} + static ErrorOr generate_emoji_data_header(Core::Stream::BufferedFile& file, EmojiData const&) { StringBuilder builder; @@ -194,19 +246,23 @@ ErrorOr serenity_main(Main::Arguments arguments) StringView generated_header_path; StringView generated_implementation_path; StringView emoji_test_path; + StringView emoji_serenity_path; Core::ArgsParser args_parser; args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path"); args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path"); + args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path"); args_parser.parse(arguments); auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read)); + auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read)); EmojiData emoji_data {}; TRY(parse_emoji_test_data(*emoji_test_file, emoji_data)); + TRY(parse_emoji_serenity_data(*emoji_serenity_file, emoji_data)); TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); diff --git a/Userland/Libraries/LibUnicode/Emoji.h b/Userland/Libraries/LibUnicode/Emoji.h index ca5d81b839..65fb225cef 100644 --- a/Userland/Libraries/LibUnicode/Emoji.h +++ b/Userland/Libraries/LibUnicode/Emoji.h @@ -13,6 +13,8 @@ namespace Unicode { enum class EmojiGroup : u8 { + Unknown, + SmileysAndEmotion, PeopleAndBody, Component, @@ -23,11 +25,14 @@ enum class EmojiGroup : u8 { Objects, Symbols, Flags, + + // Non-standard emoji added for SerenityOS: + SerenityOS, }; struct Emoji { StringView name; - EmojiGroup group; + EmojiGroup group { EmojiGroup::Unknown }; u32 display_order { 0 }; Span code_points; }; @@ -43,6 +48,8 @@ Optional find_emoji_for_code_points(u32 const (&code_points)[Size]) constexpr StringView emoji_group_to_string(EmojiGroup group) { switch (group) { + case EmojiGroup::Unknown: + return "Unknown"sv; case EmojiGroup::SmileysAndEmotion: return "Smileys & Emotion"sv; case EmojiGroup::PeopleAndBody: @@ -63,6 +70,8 @@ constexpr StringView emoji_group_to_string(EmojiGroup group) return "Symbols"sv; case EmojiGroup::Flags: return "Flags"sv; + case EmojiGroup::SerenityOS: + return "SerenityOS"sv; } VERIFY_NOT_REACHED(); @@ -70,6 +79,8 @@ constexpr StringView emoji_group_to_string(EmojiGroup group) constexpr EmojiGroup emoji_group_from_string(StringView group) { + if (group == "Unknown"sv) + return EmojiGroup::Unknown; if (group == "Smileys & Emotion"sv) return EmojiGroup::SmileysAndEmotion; if (group == "People & Body"sv) @@ -90,6 +101,8 @@ constexpr EmojiGroup emoji_group_from_string(StringView group) return EmojiGroup::Symbols; if (group == "Flags"sv) return EmojiGroup::Flags; + if (group == "SerenityOS"sv) + return EmojiGroup::SerenityOS; VERIFY_NOT_REACHED(); }