1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 01:07:35 +00:00

Meta: Generate emoji.txt at build time from Unicode's emoji-test.txt

Instead of manually updating emoji.txt whenever new emoji are added,
we use Unicode's emoji-test.txt to generate emoji.txt on each build,
including only the emojis that Serenity supports at that time.

By using emoji-test.txt, we can also include all forms of each emoji
(fully-qualified, minimally-qualified, and unqualified) which can be
helpful when double-checking how certain forms are handled.
This commit is contained in:
Ryan Liptak 2022-08-20 02:20:22 -07:00 committed by Linus Groh
parent 8f4317e207
commit 221d9089e9
5 changed files with 86 additions and 509 deletions

View file

@ -0,0 +1,23 @@
option(ENABLE_EMOJI_TXT_GENERATION "Enable download of emoji-test.txt and generation of emoji.txt at build time" ON)
set(EMOJI_TEST_TXT_PATH ${CMAKE_BINARY_DIR}/emoji-test.txt)
set(EMOJI_TEST_TXT_URL https://unicode.org/Public/emoji/14.0/emoji-test.txt)
if(ENABLE_EMOJI_TXT_GENERATION)
if(NOT EXISTS ${EMOJI_TEST_TXT_PATH})
file(DOWNLOAD ${EMOJI_TEST_TXT_URL} ${EMOJI_TEST_TXT_PATH})
endif()
set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji")
set(EMOJI_TXT_INSTALL_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji.txt")
add_custom_command(
OUTPUT ${EMOJI_TXT_INSTALL_PATH}
COMMAND ${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh "${EMOJI_TEST_TXT_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_TXT_INSTALL_PATH}"
# This will make this command only run when the modified time of the directory changes,
# which only happens if files within it are added or deleted, but not when a file is modified.
# This is fine for this use-case, because the contents of a file changing should not affect
# the generated emoji.txt file.
MAIN_DEPENDENCY ${EMOJI_RES_PATH}
USES_TERMINAL
)
add_custom_target(generate_emoji_txt ALL DEPENDS ${EMOJI_TXT_INSTALL_PATH})
endif()

61
Meta/generate-emoji-txt.sh Executable file
View file

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -e
if [ $# -ne 3 ]; then
echo "Usage: $0 <input emoji-test.txt file> <emoji image directory> <output path>"
exit 1
fi
INPUT_FILE="$1"
EMOJI_DIR="$2"
OUTPUT_PATH="$3"
# empty the generated file first
:>| "$OUTPUT_PATH"
first_heading=true
while IFS= read -r line
do
if [[ $line == \#\ subgroup:\ * || $line == \#\ group:\ * ]]; then
if [ $first_heading = false ]; then
echo "" >> "$OUTPUT_PATH"
fi
echo "$line" >> "$OUTPUT_PATH"
first_heading=false
elif [[ ${#line} -ne 0 && $line != \#* ]]; then
codepoints_string=${line%%;*}
IFS=" " read -r -a codepoints <<< "$codepoints_string"
for i in "${!codepoints[@]}"; do
# strip leading zeros
codepoints[$i]="${codepoints[$i]#"${codepoints[$i]%%[!0]*}"}"
# add U+ prefix
codepoints[$i]="U+${codepoints[$i]}"
done
# when doing a lookup we want to remove all U+FE0F (emoji presentation specifier) codepoints
lookup_filename_parts=()
for codepoint in "${codepoints[@]}"; do
if [[ $codepoint != "U+FE0F" ]]; then
lookup_filename_parts+=("$codepoint")
fi
done
IFS=_
lookup_filename="${lookup_filename_parts[*]}.png"
if [ -f "$EMOJI_DIR/$lookup_filename" ]; then
emoji_and_name=${line#*# }
emoji=${emoji_and_name%% E*}
name_with_version=${emoji_and_name#* }
name=${name_with_version#* }
qualification=${line#*; }
qualification=${qualification%%#*}
# remove trailing whitespace characters
qualification="${qualification%"${qualification##*[![:space:]]}"}"
IFS=" "
echo "$emoji - ${codepoints[*]} ${name^^} ($qualification)" >> "$OUTPUT_PATH"
fi
fi
done < "$INPUT_FILE"