1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 14:37:46 +00:00

LibUnicode: Parse and generate case folding code point data

Case folding rules have a similar mapping style as special casing rules,
where one code point may map to zero or more case folding rules. These
will be used for case-insensitive string comparisons. To see how case
folding can differ from other casing rules, consider "ß" (U+00DF):

    >>> "ß".lower()
    'ß'

    >>> "ß".upper()
    'SS'

    >>> "ß".title()
    'Ss'

    >>> "ß".casefold()
    'ss'
This commit is contained in:
Timothy Flynn 2023-01-17 08:34:38 -05:00 committed by Linus Groh
parent 9226cf7272
commit 8f2589b3b0
7 changed files with 207 additions and 5 deletions

View file

@ -13,6 +13,9 @@ set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}")
set(SPECIAL_CASING_SOURCE "SpecialCasing.txt")
set(SPECIAL_CASING_PATH "${UCD_PATH}/${SPECIAL_CASING_SOURCE}")
set(CASE_FOLDING_SOURCE "CaseFolding.txt")
set(CASE_FOLDING_PATH "${UCD_PATH}/${CASE_FOLDING_SOURCE}")
set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt")
set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}")
@ -71,6 +74,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SPECIAL_CASING_SOURCE}" "${SPECIAL_CASING_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${CASE_FOLDING_SOURCE}" "${CASE_FOLDING_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_LIST_SOURCE}" "${PROP_LIST_PATH}")
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_CORE_PROP_SOURCE}" "${DERIVED_CORE_PROP_PATH}")
@ -105,7 +109,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
"${UCD_VERSION_FILE}"
"${UNICODE_DATA_HEADER}"
"${UNICODE_DATA_IMPLEMENTATION}"
arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -k "${BLOCKS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -k "${BLOCKS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
)
invoke_generator(
"EmojiData"