mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 20:52:45 +00:00 
			
		
		
		
	Meta: Replace Bash script for generating emoji.txt with C++ generator
We currently have two build-time parsers for the UCD's emoji-test.txt file. To prepare for future changes, this removes the Bash parser and moves its functionality to the newer C++ parser.
This commit is contained in:
		
							parent
							
								
									b5a876e606
								
							
						
					
					
						commit
						bd592480e4
					
				
					 3 changed files with 101 additions and 94 deletions
				
			
		|  | @ -61,7 +61,6 @@ set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}") | ||||||
| string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}") | string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}") | ||||||
| set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt") | set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt") | ||||||
| set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt") | set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt") | ||||||
| set(EMOJI_GENERATOR_PATH "${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh") |  | ||||||
| set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji") | set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji") | ||||||
| set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt") | set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt") | ||||||
| set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt") | set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt") | ||||||
|  | @ -96,6 +95,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) | ||||||
|     set(EMOJI_DATA_HEADER EmojiData.h) |     set(EMOJI_DATA_HEADER EmojiData.h) | ||||||
|     set(EMOJI_DATA_IMPLEMENTATION EmojiData.cpp) |     set(EMOJI_DATA_IMPLEMENTATION EmojiData.cpp) | ||||||
| 
 | 
 | ||||||
|  |     if (SERENITYOS) | ||||||
|  |         set(EMOJI_INSTALL_ARG -i "${EMOJI_INSTALL_PATH}") | ||||||
|  |     endif() | ||||||
|  | 
 | ||||||
|     invoke_generator( |     invoke_generator( | ||||||
|         "UnicodeData" |         "UnicodeData" | ||||||
|         Lagom::GenerateUnicodeData |         Lagom::GenerateUnicodeData | ||||||
|  | @ -110,22 +113,14 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) | ||||||
|         "${UCD_VERSION_FILE}" |         "${UCD_VERSION_FILE}" | ||||||
|         "${EMOJI_DATA_HEADER}" |         "${EMOJI_DATA_HEADER}" | ||||||
|         "${EMOJI_DATA_IMPLEMENTATION}" |         "${EMOJI_DATA_IMPLEMENTATION}" | ||||||
|         arguments -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" |         arguments "${EMOJI_INSTALL_ARG}" -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" -r "${EMOJI_RES_PATH}" | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|     if (SERENITYOS) |  | ||||||
|         add_custom_command( |  | ||||||
|             OUTPUT "${EMOJI_INSTALL_PATH}" |  | ||||||
|             COMMAND "${EMOJI_GENERATOR_PATH}" "${EMOJI_TEST_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_INSTALL_PATH}" |  | ||||||
|         # This will make this command only run when the modified time of the directory changes, |         # This will make this command only run when the modified time of the directory changes, | ||||||
|         # which only happens if files within it are added or deleted, but not when a file is modified. |         # which only happens if files within it are added or deleted, but not when a file is modified. | ||||||
|         # This is fine for this use-case, because the contents of a file changing should not affect |         # This is fine for this use-case, because the contents of a file changing should not affect | ||||||
|         # the generated emoji.txt file. |         # the generated emoji.txt file. | ||||||
|             DEPENDS "${EMOJI_GENERATOR_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_TEST_PATH}" |         dependencies "${EMOJI_RES_PATH}" "${EMOJI_SERENITY_PATH}" | ||||||
|             USES_TERMINAL |  | ||||||
|     ) |     ) | ||||||
|         add_custom_target(generate_emoji_txt ALL DEPENDS "${EMOJI_INSTALL_PATH}") |  | ||||||
|     endif() |  | ||||||
| 
 | 
 | ||||||
|     set(UNICODE_DATA_SOURCES |     set(UNICODE_DATA_SOURCES | ||||||
|         ${UNICODE_DATA_HEADER} |         ${UNICODE_DATA_HEADER} | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include <AK/StringUtils.h> | #include <AK/StringUtils.h> | ||||||
| #include <AK/Types.h> | #include <AK/Types.h> | ||||||
| #include <LibCore/ArgsParser.h> | #include <LibCore/ArgsParser.h> | ||||||
|  | #include <LibCore/Directory.h> | ||||||
| #include <LibCore/Stream.h> | #include <LibCore/Stream.h> | ||||||
| #include <LibUnicode/Emoji.h> | #include <LibUnicode/Emoji.h> | ||||||
| 
 | 
 | ||||||
|  | @ -19,10 +20,14 @@ constexpr auto s_string_index_type = "u16"sv; | ||||||
| 
 | 
 | ||||||
| struct Emoji { | struct Emoji { | ||||||
|     StringIndexType name { 0 }; |     StringIndexType name { 0 }; | ||||||
|  |     Optional<String> image_path; | ||||||
|     Unicode::EmojiGroup group; |     Unicode::EmojiGroup group; | ||||||
|  |     String subgroup; | ||||||
|     u32 display_order { 0 }; |     u32 display_order { 0 }; | ||||||
|     String code_points_name; |  | ||||||
|     Vector<u32> code_points; |     Vector<u32> code_points; | ||||||
|  |     String code_points_name; | ||||||
|  |     String encoded_code_points; | ||||||
|  |     String status; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct EmojiData { | struct EmojiData { | ||||||
|  | @ -30,13 +35,32 @@ struct EmojiData { | ||||||
|     Vector<Emoji> emojis; |     Vector<Emoji> emojis; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static void set_image_path_for_emoji(StringView emoji_resource_path, Emoji& emoji) | ||||||
|  | { | ||||||
|  |     StringBuilder builder; | ||||||
|  | 
 | ||||||
|  |     for (auto code_point : emoji.code_points) { | ||||||
|  |         if (code_point == 0xfe0f) | ||||||
|  |             continue; | ||||||
|  |         if (!builder.is_empty()) | ||||||
|  |             builder.append('_'); | ||||||
|  |         builder.appendff("U+{:X}", code_point); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     auto path = String::formatted("{}/{}.png", emoji_resource_path, builder.build()); | ||||||
|  |     if (Core::Stream::File::exists(path)) | ||||||
|  |         emoji.image_path = move(path); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data) | static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data) | ||||||
| { | { | ||||||
|     static constexpr auto group_header = "# group: "sv; |     static constexpr auto group_header = "# group: "sv; | ||||||
|  |     static constexpr auto subgroup_header = "# subgroup: "sv; | ||||||
| 
 | 
 | ||||||
|     Array<u8, 1024> buffer; |     Array<u8, 1024> buffer; | ||||||
| 
 | 
 | ||||||
|     Unicode::EmojiGroup group; |     Unicode::EmojiGroup group; | ||||||
|  |     String subgroup; | ||||||
|     u32 display_order { 0 }; |     u32 display_order { 0 }; | ||||||
| 
 | 
 | ||||||
|     while (TRY(file.can_read_line())) { |     while (TRY(file.can_read_line())) { | ||||||
|  | @ -48,6 +72,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo | ||||||
|             if (line.starts_with(group_header)) { |             if (line.starts_with(group_header)) { | ||||||
|                 auto name = line.substring_view(group_header.length()); |                 auto name = line.substring_view(group_header.length()); | ||||||
|                 group = Unicode::emoji_group_from_string(name); |                 group = Unicode::emoji_group_from_string(name); | ||||||
|  |             } else if (line.starts_with(subgroup_header)) { | ||||||
|  |                 subgroup = line.substring_view(subgroup_header.length()); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             continue; |             continue; | ||||||
|  | @ -61,6 +87,7 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo | ||||||
| 
 | 
 | ||||||
|         Emoji emoji {}; |         Emoji emoji {}; | ||||||
|         emoji.group = group; |         emoji.group = group; | ||||||
|  |         emoji.subgroup = subgroup; | ||||||
|         emoji.display_order = display_order++; |         emoji.display_order = display_order++; | ||||||
| 
 | 
 | ||||||
|         auto code_points = line.substring_view(0, *status_index).split_view(' '); |         auto code_points = line.substring_view(0, *status_index).split_view(' '); | ||||||
|  | @ -81,6 +108,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo | ||||||
|         auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace(); |         auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace(); | ||||||
|         emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string()); |         emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string()); | ||||||
|         emoji.code_points_name = String::join('_', code_points); |         emoji.code_points_name = String::join('_', code_points); | ||||||
|  |         emoji.encoded_code_points = emoji_and_name.substring_view(0, emoji_and_name_spaces[1]).trim_whitespace(); | ||||||
|  |         emoji.status = line.substring_view(*status_index + 1, *emoji_and_name_index - *status_index - 1).trim_whitespace(); | ||||||
| 
 | 
 | ||||||
|         TRY(emoji_data.emojis.try_append(move(emoji))); |         TRY(emoji_data.emojis.try_append(move(emoji))); | ||||||
|     } |     } | ||||||
|  | @ -241,24 +270,74 @@ Optional<Emoji> find_emoji_for_code_points(Span<u32 const> code_points) | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static ErrorOr<void> generate_emoji_installation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data) | ||||||
|  | { | ||||||
|  |     StringBuilder builder; | ||||||
|  |     SourceGenerator generator { builder }; | ||||||
|  | 
 | ||||||
|  |     auto current_group = Unicode::EmojiGroup::Unknown; | ||||||
|  |     StringView current_subgroup; | ||||||
|  | 
 | ||||||
|  |     for (auto const& emoji : emoji_data.emojis) { | ||||||
|  |         if (!emoji.image_path.has_value()) | ||||||
|  |             continue; | ||||||
|  |         if (emoji.group == Unicode::EmojiGroup::SerenityOS) | ||||||
|  |             continue; // SerenityOS emojis are in emoji-serenity.txt
 | ||||||
|  | 
 | ||||||
|  |         if (current_group != emoji.group) { | ||||||
|  |             if (!builder.is_empty()) | ||||||
|  |                 generator.append("\n"sv); | ||||||
|  | 
 | ||||||
|  |             generator.set("group"sv, Unicode::emoji_group_to_string(emoji.group)); | ||||||
|  |             generator.append("# group: @group@\n"); | ||||||
|  | 
 | ||||||
|  |             current_group = emoji.group; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (current_subgroup != emoji.subgroup) { | ||||||
|  |             generator.set("subgroup"sv, emoji.subgroup); | ||||||
|  |             generator.append("\n# subgroup: @subgroup@\n"); | ||||||
|  | 
 | ||||||
|  |             current_subgroup = emoji.subgroup; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         generator.set("emoji"sv, emoji.encoded_code_points); | ||||||
|  |         generator.set("name"sv, emoji_data.unique_strings.get(emoji.name)); | ||||||
|  |         generator.set("status"sv, emoji.status); | ||||||
|  | 
 | ||||||
|  |         generator.append("@emoji@"sv); | ||||||
|  |         generator.append(" - "sv); | ||||||
|  |         generator.append(String::join(" "sv, emoji.code_points, "U+{:X}"sv)); | ||||||
|  |         generator.append(" @name@ (@status@)\n"sv); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     TRY(file.write(generator.as_string_view().bytes())); | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| ErrorOr<int> serenity_main(Main::Arguments arguments) | ErrorOr<int> serenity_main(Main::Arguments arguments) | ||||||
| { | { | ||||||
|     StringView generated_header_path; |     StringView generated_header_path; | ||||||
|     StringView generated_implementation_path; |     StringView generated_implementation_path; | ||||||
|  |     StringView generated_installation_path; | ||||||
|     StringView emoji_test_path; |     StringView emoji_test_path; | ||||||
|     StringView emoji_serenity_path; |     StringView emoji_serenity_path; | ||||||
|  |     StringView emoji_resource_path; | ||||||
| 
 | 
 | ||||||
|     Core::ArgsParser args_parser; |     Core::ArgsParser args_parser; | ||||||
|     args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path"); |     args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path"); | ||||||
|     args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); |     args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); | ||||||
|  |     args_parser.add_option(generated_installation_path, "Path to the emoji.txt file to generate", "generated-installation-path", 'i', "generated-installation-path"); | ||||||
|     args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path"); |     args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path"); | ||||||
|     args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path"); |     args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path"); | ||||||
|  |     args_parser.add_option(emoji_resource_path, "Path to the /res/emoji directory", "emoji-resource-path", 'r', "emoji-resource-path"); | ||||||
|     args_parser.parse(arguments); |     args_parser.parse(arguments); | ||||||
| 
 | 
 | ||||||
|     auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); |     auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); | ||||||
|     auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); |     auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); | ||||||
|     auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read)); |     auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read)); | ||||||
|     auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read)); |     auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read)); | ||||||
|  |     VERIFY(Core::Stream::File::exists(emoji_resource_path)); | ||||||
| 
 | 
 | ||||||
|     EmojiData emoji_data {}; |     EmojiData emoji_data {}; | ||||||
|     TRY(parse_emoji_test_data(*emoji_test_file, emoji_data)); |     TRY(parse_emoji_test_data(*emoji_test_file, emoji_data)); | ||||||
|  | @ -267,5 +346,15 @@ ErrorOr<int> serenity_main(Main::Arguments arguments) | ||||||
|     TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); |     TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); | ||||||
|     TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); |     TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); | ||||||
| 
 | 
 | ||||||
|  |     if (!generated_installation_path.is_empty()) { | ||||||
|  |         TRY(Core::Directory::create(LexicalPath { generated_installation_path }.parent(), Core::Directory::CreateDirectories::Yes)); | ||||||
|  | 
 | ||||||
|  |         for (auto& emoji : emoji_data.emojis) | ||||||
|  |             set_image_path_for_emoji(emoji_resource_path, emoji); | ||||||
|  | 
 | ||||||
|  |         auto generated_installation_file = TRY(open_file(generated_installation_path, Core::Stream::OpenMode::Write)); | ||||||
|  |         TRY(generate_emoji_installation(*generated_installation_file, emoji_data)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,77 +0,0 @@ | ||||||
| #!/usr/bin/env bash |  | ||||||
| 
 |  | ||||||
| set -e |  | ||||||
| 
 |  | ||||||
| if [ $# -ne 3 ]; then |  | ||||||
|   echo "Usage: $0 <input emoji-test.txt file> <emoji image directory> <output path>" |  | ||||||
|   exit 1 |  | ||||||
| fi |  | ||||||
| 
 |  | ||||||
| INPUT_FILE="$1" |  | ||||||
| EMOJI_DIR="$2" |  | ||||||
| OUTPUT_PATH="$3" |  | ||||||
| 
 |  | ||||||
| # empty the generated file first |  | ||||||
| :>| "$OUTPUT_PATH" |  | ||||||
| 
 |  | ||||||
| first_heading=true |  | ||||||
| printed_group_header=false |  | ||||||
| printed_subgroup_header=false |  | ||||||
| while IFS= read -r line |  | ||||||
| do |  | ||||||
|     if [[ $line == \#\ group:\ * ]]; then |  | ||||||
|         current_group="$line" |  | ||||||
|         printed_group_header=false |  | ||||||
|     elif [[ $line == \#\ subgroup:\ * ]]; then |  | ||||||
|         current_subgroup="$line" |  | ||||||
|         printed_subgroup_header=false |  | ||||||
|     elif [[ ${#line} -ne 0 && $line != \#* ]]; then |  | ||||||
|         codepoints_string=${line%%;*} |  | ||||||
|         IFS=" " read -r -a codepoints <<< "$codepoints_string" |  | ||||||
|         for i in "${!codepoints[@]}"; do |  | ||||||
|             # strip leading zeros |  | ||||||
|             codepoints[$i]="${codepoints[$i]#"${codepoints[$i]%%[!0]*}"}" |  | ||||||
|             # add U+ prefix |  | ||||||
|             codepoints[$i]="U+${codepoints[$i]}" |  | ||||||
|         done |  | ||||||
| 
 |  | ||||||
|         # when doing a lookup we want to remove all U+FE0F (emoji presentation specifier) codepoints |  | ||||||
|         lookup_filename_parts=() |  | ||||||
|         for codepoint in "${codepoints[@]}"; do |  | ||||||
|             if [[ $codepoint != "U+FE0F" ]]; then |  | ||||||
|                 lookup_filename_parts+=("$codepoint") |  | ||||||
|             fi |  | ||||||
|         done |  | ||||||
| 
 |  | ||||||
|         IFS=_ |  | ||||||
|         lookup_filename="${lookup_filename_parts[*]}.png" |  | ||||||
| 
 |  | ||||||
|         if [ -f "$EMOJI_DIR/$lookup_filename" ]; then |  | ||||||
|             if [ $printed_group_header = false ]; then |  | ||||||
|                 if [ $first_heading = false ]; then |  | ||||||
|                     echo "" >> "$OUTPUT_PATH" |  | ||||||
|                 fi |  | ||||||
|                 echo "$current_group" >> "$OUTPUT_PATH" |  | ||||||
|                 first_heading=false |  | ||||||
|                 printed_group_header=true |  | ||||||
|             fi |  | ||||||
|             if [ $printed_subgroup_header = false ]; then |  | ||||||
|                 echo "" >> "$OUTPUT_PATH" |  | ||||||
|                 echo "$current_subgroup" >> "$OUTPUT_PATH" |  | ||||||
|                 printed_subgroup_header=true |  | ||||||
|             fi |  | ||||||
| 
 |  | ||||||
|             emoji_and_name=${line#*# } |  | ||||||
|             emoji=${emoji_and_name%% E*} |  | ||||||
|             name_with_version=${emoji_and_name#* } |  | ||||||
|             name=${name_with_version#* } |  | ||||||
|             qualification=${line#*; } |  | ||||||
|             qualification=${qualification%%#*} |  | ||||||
|             # remove trailing whitespace characters |  | ||||||
|             qualification="${qualification%"${qualification##*[![:space:]]}"}" |  | ||||||
| 
 |  | ||||||
|             IFS=" " |  | ||||||
|             echo "$emoji - ${codepoints[*]} ${name^^} ($qualification)" >> "$OUTPUT_PATH" |  | ||||||
|         fi |  | ||||||
|     fi |  | ||||||
| done < "$INPUT_FILE" |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn