From f082b6ae48820f3660a3da26ec3c58b6d3416dd4 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Fri, 2 Sep 2022 12:16:00 -0400 Subject: [PATCH] LibUnicode: Generate a separate Locale enumeration for special casing The UCD only cares about a few locales for special casing rules (az, lt, and tr). Unfortunately, LibUnicode cannot use LibLocale once the libraries are separate because LibLocale will need to use LibUnicode for many more things; thus there would be a circular dependency. Instead, just generate the small enum needed for this one use case. --- .../LibUnicode/GenerateUnicodeData.cpp | 35 ++++++++++++++----- .../Libraries/LibUnicode/CharacterTypes.cpp | 7 ++-- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index c69e905444..15efd02dec 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -109,6 +109,7 @@ struct UnicodeData { u32 largest_casing_transform_size { 0 }; u32 largest_special_casing_size { 0 }; Vector conditions; + Vector locales; Vector code_point_data; @@ -232,8 +233,13 @@ static ErrorOr parse_special_casing(Core::Stream::BufferedFile& file, Unic casing.condition = conditions[0]; } - if (!casing.locale.is_empty()) + if (!casing.locale.is_empty()) { casing.locale = String::formatted("{:c}{}", to_ascii_uppercase(casing.locale[0]), casing.locale.substring_view(1)); + + if (!unicode_data.locales.contains_slow(casing.locale)) + unicode_data.locales.append(casing.locale); + } + casing.condition = casing.condition.replace("_"sv, ""sv, ReplaceMode::All); if (!casing.condition.is_empty() && !unicode_data.conditions.contains_slow(casing.condition)) @@ -686,12 +692,12 @@ enum class @name@ : @underlying@ {)~~~"); #pragma once #include -#include #include namespace Unicode { )~~~"); + generate_enum("Locale"sv, "None"sv, unicode_data.locales); generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions)); generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases); generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases); @@ -714,10 +720,12 @@ struct SpecialCasing { u32 titlecase_mapping[@casing_transform_size@]; u32 titlecase_mapping_size { 0 }; - Locale::Locale locale { Locale::Locale::None }; + Locale locale { Locale::None }; Condition condition { Condition::None }; }; +Optional locale_from_string(StringView locale); + } )~~~"); @@ -780,7 +788,7 @@ static constexpr Array s_special_casing { append_list_and_size(casing.titlecase_mapping, format); generator.set("locale", casing.locale.is_empty() ? "None" : casing.locale); - generator.append(", Locale::Locale::@locale@"); + generator.append(", Locale::@locale@"); generator.set("condition", casing.condition.is_empty() ? "None" : casing.condition); generator.append(", Condition::@condition@"); @@ -1096,18 +1104,29 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@) )~~~"); }; - auto append_from_string = [&](StringView enum_title, StringView enum_snake, PropList const& prop_list, Vector const& aliases) { + auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& prop_list, Vector const& aliases) { HashValueMap hashes; hashes.ensure_capacity(prop_list.size() + aliases.size()); - for (auto const& prop : prop_list) - hashes.set(prop.key.hash(), prop.key); + ValueFromStringOptions options {}; + + for (auto const& prop : prop_list) { + if constexpr (IsSame, String>) { + hashes.set(CaseInsensitiveStringViewTraits::hash(prop), prop); + options.sensitivity = CaseSensitivity::CaseInsensitive; + } else { + hashes.set(prop.key.hash(), prop.key); + } + } + for (auto const& alias : aliases) hashes.set(alias.alias.hash(), alias.alias); - generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes)); + generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes), options); }; + append_from_string("Locale"sv, "locale"sv, unicode_data.locales, {}); + append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv); append_from_string("GeneralCategory"sv, "general_category"sv, unicode_data.general_categories, unicode_data.general_category_aliases); diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 9a77b4b4be..f718d0d839 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #if ENABLE_UNICODE_DATA @@ -163,17 +162,17 @@ static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t in static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View const& string, Optional locale, size_t index, size_t byte_length) { - auto requested_locale = Locale::Locale::None; + auto requested_locale = Locale::None; if (locale.has_value()) { - if (auto maybe_locale = Locale::locale_from_string(*locale); maybe_locale.has_value()) + if (auto maybe_locale = locale_from_string(*locale); maybe_locale.has_value()) requested_locale = *maybe_locale; } auto special_casings = special_case_mapping(code_point); for (auto const* special_casing : special_casings) { - if (special_casing->locale != Locale::Locale::None && special_casing->locale != requested_locale) + if (special_casing->locale != Locale::None && special_casing->locale != requested_locale) continue; switch (special_casing->condition) {