1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-28 07:55:07 +00:00

LibUnicode: Download and parse the word break property list UCD file

Note that unlike the main property list, each code point has only one
word break property. Code points that do not have a word break property
are to be assigned the property "Other".
This commit is contained in:
Timothy Flynn 2021-07-27 18:24:39 -04:00 committed by Andreas Kling
parent c45a014645
commit 12fb3ae033
2 changed files with 60 additions and 5 deletions

View file

@ -9,6 +9,9 @@ set(SPECIAL_CASING_PATH ${CMAKE_BINARY_DIR}/UCD/SpecialCasing.txt)
set(PROP_LIST_URL https://www.unicode.org/Public/13.0.0/ucd/PropList.txt)
set(PROP_LIST_PATH ${CMAKE_BINARY_DIR}/UCD/PropList.txt)
set(WORD_BREAK_URL https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt)
set(WORD_BREAK_PATH ${CMAKE_BINARY_DIR}/UCD/WordBreakProperty.txt)
if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
if (NOT EXISTS ${UNICODE_DATA_PATH})
message(STATUS "Downloading UCD UnicodeData.txt from ${UNICODE_DATA_URL}...")
@ -22,6 +25,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
message(STATUS "Downloading UCD PropList.txt from ${PROP_LIST_URL}...")
file(DOWNLOAD ${PROP_LIST_URL} ${PROP_LIST_PATH} INACTIVITY_TIMEOUT 10)
endif()
if (NOT EXISTS ${WORD_BREAK_PATH})
message(STATUS "Downloading UCD WordBreakProperty.txt from ${WORD_BREAK_URL}...")
file(DOWNLOAD ${WORD_BREAK_URL} ${WORD_BREAK_PATH} INACTIVITY_TIMEOUT 10)
endif()
set(UNICODE_GENERATOR CodeGenerators/GenerateUnicodeData)
set(UNICODE_DATA_HEADER UnicodeData.h)
@ -39,7 +46,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
add_custom_command(
OUTPUT ${UNICODE_DATA_HEADER}
COMMAND ${write_if_different} ${UNICODE_DATA_HEADER} ${UNICODE_GENERATOR} -h -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH}
COMMAND ${write_if_different} ${UNICODE_DATA_HEADER} ${UNICODE_GENERATOR} -h -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -w ${WORD_BREAK_PATH}
VERBATIM
DEPENDS GenerateUnicodeData
MAIN_DEPENDENCY ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH}
@ -47,7 +54,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
add_custom_command(
OUTPUT ${UNICODE_DATA_IMPLEMENTATION}
COMMAND ${write_if_different} ${UNICODE_DATA_IMPLEMENTATION} ${UNICODE_GENERATOR} -c -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH}
COMMAND ${write_if_different} ${UNICODE_DATA_IMPLEMENTATION} ${UNICODE_GENERATOR} -c -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -w ${WORD_BREAK_PATH}
VERBATIM
DEPENDS GenerateUnicodeData
MAIN_DEPENDENCY ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH}