mirror of
https://github.com/RGBCube/serenity
synced 2025-05-30 22:48:11 +00:00
LibUnicode: Parse UCD Scripts.txt and generate as a Unicode property
There are a couple of minor nuances with parsing script values, compared to other properties. In Scripts.txt, the UCD file lists the full name of each script; other properties, like General Category, list the shorter name in their primary files. This means that the aliases listed in PropertyValueAliases.txt are reversed for script values.
This commit is contained in:
parent
619c924042
commit
f5c1bbc00b
5 changed files with 112 additions and 29 deletions
|
@ -18,6 +18,9 @@ set(PROP_ALIAS_PATH ${CMAKE_BINARY_DIR}/UCD/PropertyAliases.txt)
|
|||
set(PROP_VALUE_ALIAS_URL https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt)
|
||||
set(PROP_VALUE_ALIAS_PATH ${CMAKE_BINARY_DIR}/UCD/PropertyValueAliases.txt)
|
||||
|
||||
set(SCRIPTS_URL https://www.unicode.org/Public/13.0.0/ucd/Scripts.txt)
|
||||
set(SCRIPTS_PATH ${CMAKE_BINARY_DIR}/UCD/Scripts.txt)
|
||||
|
||||
set(WORD_BREAK_URL https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt)
|
||||
set(WORD_BREAK_PATH ${CMAKE_BINARY_DIR}/UCD/WordBreakProperty.txt)
|
||||
|
||||
|
@ -46,6 +49,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
message(STATUS "Downloading UCD PropertyValueAliases.txt from ${PROP_VALUE_ALIAS_URL}...")
|
||||
file(DOWNLOAD ${PROP_VALUE_ALIAS_URL} ${PROP_VALUE_ALIAS_PATH} INACTIVITY_TIMEOUT 10)
|
||||
endif()
|
||||
if (NOT EXISTS ${SCRIPTS_PATH})
|
||||
message(STATUS "Downloading UCD Scripts.txt from ${SCRIPTS_URL}...")
|
||||
file(DOWNLOAD ${SCRIPTS_URL} ${SCRIPTS_PATH} INACTIVITY_TIMEOUT 10)
|
||||
endif()
|
||||
if (NOT EXISTS ${WORD_BREAK_PATH})
|
||||
message(STATUS "Downloading UCD WordBreakProperty.txt from ${WORD_BREAK_URL}...")
|
||||
file(DOWNLOAD ${WORD_BREAK_URL} ${WORD_BREAK_PATH} INACTIVITY_TIMEOUT 10)
|
||||
|
@ -61,9 +68,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
|
||||
add_custom_command(
|
||||
OUTPUT ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION}
|
||||
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -w ${WORD_BREAK_PATH}
|
||||
COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -w ${WORD_BREAK_PATH}
|
||||
VERBATIM
|
||||
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${WORD_BREAK_PATH}
|
||||
DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${WORD_BREAK_PATH}
|
||||
)
|
||||
|
||||
set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue