mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:07:45 +00:00
LibUnicode: Parse UCD emoji-data.txt and generate Unicode property
This commit is contained in:
parent
484ccfadc3
commit
9113f892a7
3 changed files with 19 additions and 8 deletions
|
@ -275,12 +275,12 @@ bool is_ecma262_property([[maybe_unused]] Property property)
|
|||
case Unicode::Property::Default_Ignorable_Code_Point:
|
||||
case Unicode::Property::Deprecated:
|
||||
case Unicode::Property::Diacritic:
|
||||
// case Unicode::Property::Emoji:
|
||||
// case Unicode::Property::Emoji_Component:
|
||||
// case Unicode::Property::Emoji_Modifier:
|
||||
// case Unicode::Property::Emoji_Modifier_Base:
|
||||
// case Unicode::Property::Emoji_Presentation:
|
||||
// case Unicode::Property::Extended_Pictographic:
|
||||
case Unicode::Property::Emoji:
|
||||
case Unicode::Property::Emoji_Component:
|
||||
case Unicode::Property::Emoji_Modifier:
|
||||
case Unicode::Property::Emoji_Modifier_Base:
|
||||
case Unicode::Property::Emoji_Presentation:
|
||||
case Unicode::Property::Extended_Pictographic:
|
||||
case Unicode::Property::Extender:
|
||||
case Unicode::Property::Grapheme_Base:
|
||||
case Unicode::Property::Grapheme_Extend:
|
||||
|
|
|
@ -828,6 +828,7 @@ int main(int argc, char** argv)
|
|||
char const* scripts_path = nullptr;
|
||||
char const* script_extensions_path = nullptr;
|
||||
char const* word_break_path = nullptr;
|
||||
char const* emoji_data_path = nullptr;
|
||||
|
||||
Core::ArgsParser args_parser;
|
||||
args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
|
@ -841,6 +842,7 @@ int main(int argc, char** argv)
|
|||
args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
|
||||
args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path");
|
||||
args_parser.add_option(word_break_path, "Path to WordBreakProperty.txt file", "word-break-path", 'w', "word-break-path");
|
||||
args_parser.add_option(emoji_data_path, "Path to emoji-data.txt file", "emoji-data-path", 'e', "emoji-data-path");
|
||||
args_parser.parse(argc, argv);
|
||||
|
||||
auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
|
||||
|
@ -870,11 +872,13 @@ int main(int argc, char** argv)
|
|||
auto scripts_file = open_file(scripts_path, "-r/--scripts-path");
|
||||
auto script_extensions_file = open_file(script_extensions_path, "-x/--script-extensions-path");
|
||||
auto word_break_file = open_file(word_break_path, "-w/--word-break-path");
|
||||
auto emoji_data_file = open_file(emoji_data_path, "-e/--emoji-data-path");
|
||||
|
||||
UnicodeData unicode_data {};
|
||||
parse_special_casing(special_casing_file, unicode_data);
|
||||
parse_prop_list(prop_list_file, unicode_data.prop_list);
|
||||
parse_prop_list(derived_core_prop_file, unicode_data.prop_list);
|
||||
parse_prop_list(emoji_data_file, unicode_data.prop_list);
|
||||
parse_alias_list(prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases);
|
||||
parse_prop_list(scripts_file, unicode_data.script_list);
|
||||
parse_prop_list(script_extensions_file, unicode_data.script_extensions, true);
|
||||
|
|
|
@ -27,6 +27,9 @@ set(SCRIPT_EXTENSIONS_PATH ${CMAKE_BINARY_DIR}/UCD/ScriptExtensions.txt)
|
|||
set(WORD_BREAK_URL https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt)
|
||||
set(WORD_BREAK_PATH ${CMAKE_BINARY_DIR}/UCD/WordBreakProperty.txt)
|
||||
|
||||
set(EMOJI_DATA_URL https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt)
|
||||
set(EMOJI_DATA_PATH ${CMAKE_BINARY_DIR}/UCD/emoji-data.txt)
|
||||
|
||||
if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
||||
if (NOT EXISTS ${UNICODE_DATA_PATH})
|
||||
message(STATUS "Downloading UCD UnicodeData.txt from ${UNICODE_DATA_URL}...")
|
||||
|
@ -64,6 +67,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
message(STATUS "Downloading UCD WordBreakProperty.txt from ${WORD_BREAK_URL}...")
|
||||
file(DOWNLOAD ${WORD_BREAK_URL} ${WORD_BREAK_PATH} INACTIVITY_TIMEOUT 10)
|
||||
endif()
|
||||
if (NOT EXISTS ${EMOJI_DATA_PATH})
|
||||
message(STATUS "Downloading UCD emoji-data.txt from ${EMOJI_DATA_URL}...")
|
||||
file(DOWNLOAD ${EMOJI_DATA_URL} ${EMOJI_DATA_PATH} INACTIVITY_TIMEOUT 10)
|
||||
endif()
|
||||
|
||||
set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h)
|
||||
set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp)
|
||||
|
@ -75,9 +82,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
|
||||
add_custom_command(
|
||||
OUTPUT ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION}
|
||||
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH}
|
||||
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
|
||||
VERBATIM
|
||||
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH}
|
||||
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
|
||||
)
|
||||
|
||||
set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue