1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-06-28 16:42:12 +00:00

LibUnicode: Parse and generate case folding code point data

Case folding rules have a similar mapping style as special casing rules,
where one code point may map to zero or more case folding rules. These
will be used for case-insensitive string comparisons. To see how case
folding can differ from other casing rules, consider "ß" (U+00DF):

    >>> "ß".lower()
    'ß'

    >>> "ß".upper()
    'SS'

    >>> "ß".title()
    'Ss'

    >>> "ß".casefold()
    'ss'
This commit is contained in:
Timothy Flynn 2023-01-17 08:34:38 -05:00 committed by Linus Groh
parent 9226cf7272
commit 8f2589b3b0
7 changed files with 207 additions and 5 deletions

View file

@ -195,6 +195,19 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
return nullptr;
}
template<CaseFoldingStatus... StatusFilter>
static CaseFolding const* find_matching_case_folding(u32 code_point)
{
auto case_foldings = case_folding_mapping(code_point);
for (auto const* case_folding : case_foldings) {
if (((case_folding->status == StatusFilter) || ...))
return case_folding;
}
return nullptr;
}
#endif
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
@ -314,4 +327,32 @@ ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[ma
#endif
}
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
ErrorOr<void> build_casefold_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder)
{
#if ENABLE_UNICODE_DATA
// toCasefold(X): Map each character C in X to Case_Folding(C).
//
// Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
// CaseFolding.txt in the Unicode Character Database.
using enum CaseFoldingStatus;
for (auto code_point : code_points) {
auto const* case_folding = find_matching_case_folding<Common, Full>(code_point);
if (!case_folding) {
TRY(builder.try_append_code_point(code_point));
continue;
}
for (size_t i = 0; i < case_folding->mapping_size; ++i)
TRY(builder.try_append_code_point(case_folding->mapping[i]));
}
return {};
#else
return Error::from_string_literal("Unicode data has been disabled");
#endif
}
}