mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 19:07:34 +00:00
LibUnicode: Parse and generate case folding code point data
Case folding rules have a similar mapping style as special casing rules, where one code point may map to zero or more case folding rules. These will be used for case-insensitive string comparisons. To see how case folding can differ from other casing rules, consider "ß" (U+00DF): >>> "ß".lower() 'ß' >>> "ß".upper() 'SS' >>> "ß".title() 'Ss' >>> "ß".casefold() 'ss'
This commit is contained in:
parent
9226cf7272
commit
8f2589b3b0
7 changed files with 207 additions and 5 deletions
|
@ -61,6 +61,13 @@ ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView
|
|||
return builder.to_string();
|
||||
}
|
||||
|
||||
ErrorOr<String> to_unicode_casefold_full(StringView string)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Detail::build_casefold_string(Utf8View { string }, builder));
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
Optional<GeneralCategory> __attribute__((weak)) general_category_from_string(StringView) { return {}; }
|
||||
bool __attribute__((weak)) code_point_has_general_category(u32, GeneralCategory) { return {}; }
|
||||
Optional<Property> __attribute__((weak)) property_from_string(StringView) { return {}; }
|
||||
|
|
|
@ -44,6 +44,7 @@ u32 to_unicode_titlecase(u32 code_point);
|
|||
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<String> to_unicode_casefold_full(StringView);
|
||||
|
||||
Optional<GeneralCategory> general_category_from_string(StringView);
|
||||
bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);
|
||||
|
|
|
@ -195,6 +195,19 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template<CaseFoldingStatus... StatusFilter>
|
||||
static CaseFolding const* find_matching_case_folding(u32 code_point)
|
||||
{
|
||||
auto case_foldings = case_folding_mapping(code_point);
|
||||
|
||||
for (auto const* case_folding : case_foldings) {
|
||||
if (((case_folding->status == StatusFilter) || ...))
|
||||
return case_folding;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
|
||||
|
@ -314,4 +327,32 @@ ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[ma
|
|||
#endif
|
||||
}
|
||||
|
||||
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
|
||||
ErrorOr<void> build_casefold_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
// toCasefold(X): Map each character C in X to Case_Folding(C).
|
||||
//
|
||||
// Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
|
||||
// CaseFolding.txt in the Unicode Character Database.
|
||||
|
||||
using enum CaseFoldingStatus;
|
||||
|
||||
for (auto code_point : code_points) {
|
||||
auto const* case_folding = find_matching_case_folding<Common, Full>(code_point);
|
||||
if (!case_folding) {
|
||||
TRY(builder.try_append_code_point(code_point));
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < case_folding->mapping_size; ++i)
|
||||
TRY(builder.try_append_code_point(case_folding->mapping[i]));
|
||||
}
|
||||
|
||||
return {};
|
||||
#else
|
||||
return Error::from_string_literal("Unicode data has been disabled");
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,5 +17,6 @@ namespace Unicode::Detail {
|
|||
ErrorOr<void> build_lowercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
|
||||
ErrorOr<void> build_uppercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
|
||||
ErrorOr<void> build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
|
||||
ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder);
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue