1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 04:28:13 +00:00

LibUnicode: Parse UCD DerivedBinaryProperties.txt and generate property

This commit is contained in:
Timothy Flynn 2021-08-04 07:53:24 -04:00 committed by Linus Groh
parent 9113f892a7
commit 6f2640d031
3 changed files with 14 additions and 3 deletions

View file

@ -262,7 +262,7 @@ bool is_ecma262_property([[maybe_unused]] Property property)
case Unicode::Property::Any:
case Unicode::Property::Assigned:
case Unicode::Property::Bidi_Control:
// case Unicode::Property::Bidi_Mirrored:
case Unicode::Property::Bidi_Mirrored:
case Unicode::Property::Case_Ignorable:
case Unicode::Property::Cased:
case Unicode::Property::Changes_When_Casefolded:

View file

@ -823,6 +823,7 @@ int main(int argc, char** argv)
char const* special_casing_path = nullptr;
char const* prop_list_path = nullptr;
char const* derived_core_prop_path = nullptr;
char const* derived_binary_prop_path = nullptr;
char const* prop_alias_path = nullptr;
char const* prop_value_alias_path = nullptr;
char const* scripts_path = nullptr;
@ -837,6 +838,7 @@ int main(int argc, char** argv)
args_parser.add_option(special_casing_path, "Path to SpecialCasing.txt file", "special-casing-path", 's', "special-casing-path");
args_parser.add_option(prop_list_path, "Path to PropList.txt file", "prop-list-path", 'p', "prop-list-path");
args_parser.add_option(derived_core_prop_path, "Path to DerivedCoreProperties.txt file", "derived-core-prop-path", 'd', "derived-core-prop-path");
args_parser.add_option(derived_binary_prop_path, "Path to DerivedBinaryProperties.txt file", "derived-binary-prop-path", 'b', "derived-binary-prop-path");
args_parser.add_option(prop_alias_path, "Path to PropertyAliases.txt file", "prop-alias-path", 'a', "prop-alias-path");
args_parser.add_option(prop_value_alias_path, "Path to PropertyValueAliases.txt file", "prop-value-alias-path", 'v', "prop-value-alias-path");
args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
@ -867,6 +869,7 @@ int main(int argc, char** argv)
auto special_casing_file = open_file(special_casing_path, "-s/--special-casing-path");
auto prop_list_file = open_file(prop_list_path, "-p/--prop-list-path");
auto derived_core_prop_file = open_file(derived_core_prop_path, "-d/--derived-core-prop-path");
auto derived_binary_prop_file = open_file(derived_binary_prop_path, "-b/--derived-binary-prop-path");
auto prop_alias_file = open_file(prop_alias_path, "-a/--prop-alias-path");
auto prop_value_alias_file = open_file(prop_value_alias_path, "-v/--prop-value-alias-path");
auto scripts_file = open_file(scripts_path, "-r/--scripts-path");
@ -878,6 +881,7 @@ int main(int argc, char** argv)
parse_special_casing(special_casing_file, unicode_data);
parse_prop_list(prop_list_file, unicode_data.prop_list);
parse_prop_list(derived_core_prop_file, unicode_data.prop_list);
parse_prop_list(derived_binary_prop_file, unicode_data.prop_list);
parse_prop_list(emoji_data_file, unicode_data.prop_list);
parse_alias_list(prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases);
parse_prop_list(scripts_file, unicode_data.script_list);

View file

@ -12,6 +12,9 @@ set(PROP_LIST_PATH ${CMAKE_BINARY_DIR}/UCD/PropList.txt)
set(DERIVED_CORE_PROP_URL https://www.unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt)
set(DERIVED_CORE_PROP_PATH ${CMAKE_BINARY_DIR}/UCD/DerivedCoreProperties.txt)
set(DERIVED_BINARY_PROP_URL https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedBinaryProperties.txt)
set(DERIVED_BINARY_PROP_PATH ${CMAKE_BINARY_DIR}/UCD/DerivedBinaryProperties.txt)
set(PROP_ALIAS_URL https://www.unicode.org/Public/13.0.0/ucd/PropertyAliases.txt)
set(PROP_ALIAS_PATH ${CMAKE_BINARY_DIR}/UCD/PropertyAliases.txt)
@ -47,6 +50,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
message(STATUS "Downloading UCD DerivedCoreProperties.txt from ${DERIVED_CORE_PROP_URL}...")
file(DOWNLOAD ${DERIVED_CORE_PROP_URL} ${DERIVED_CORE_PROP_PATH} INACTIVITY_TIMEOUT 10)
endif()
if (NOT EXISTS ${DERIVED_BINARY_PROP_PATH})
message(STATUS "Downloading UCD DerivedBinaryProperties.txt from ${DERIVED_BINARY_PROP_URL}...")
file(DOWNLOAD ${DERIVED_BINARY_PROP_URL} ${DERIVED_BINARY_PROP_PATH} INACTIVITY_TIMEOUT 10)
endif()
if (NOT EXISTS ${PROP_ALIAS_PATH})
message(STATUS "Downloading UCD PropertyAliases.txt from ${PROP_ALIAS_URL}...")
file(DOWNLOAD ${PROP_ALIAS_URL} ${PROP_ALIAS_PATH} INACTIVITY_TIMEOUT 10)
@ -82,9 +89,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
add_custom_command(
OUTPUT ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION}
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -b ${DERIVED_BINARY_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
VERBATIM
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${DERIVED_BINARY_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
)
set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION})