diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index c69e905444..15efd02dec 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -109,6 +109,7 @@ struct UnicodeData { u32 largest_casing_transform_size { 0 }; u32 largest_special_casing_size { 0 }; Vector conditions; + Vector locales; Vector code_point_data; @@ -232,8 +233,13 @@ static ErrorOr parse_special_casing(Core::Stream::BufferedFile& file, Unic casing.condition = conditions[0]; } - if (!casing.locale.is_empty()) + if (!casing.locale.is_empty()) { casing.locale = String::formatted("{:c}{}", to_ascii_uppercase(casing.locale[0]), casing.locale.substring_view(1)); + + if (!unicode_data.locales.contains_slow(casing.locale)) + unicode_data.locales.append(casing.locale); + } + casing.condition = casing.condition.replace("_"sv, ""sv, ReplaceMode::All); if (!casing.condition.is_empty() && !unicode_data.conditions.contains_slow(casing.condition)) @@ -686,12 +692,12 @@ enum class @name@ : @underlying@ {)~~~"); #pragma once #include -#include #include namespace Unicode { )~~~"); + generate_enum("Locale"sv, "None"sv, unicode_data.locales); generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions)); generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases); generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases); @@ -714,10 +720,12 @@ struct SpecialCasing { u32 titlecase_mapping[@casing_transform_size@]; u32 titlecase_mapping_size { 0 }; - Locale::Locale locale { Locale::Locale::None }; + Locale locale { Locale::None }; Condition condition { Condition::None }; }; +Optional locale_from_string(StringView locale); + } )~~~"); @@ -780,7 +788,7 @@ static constexpr Array s_special_casing { append_list_and_size(casing.titlecase_mapping, format); generator.set("locale", casing.locale.is_empty() ? "None" : casing.locale); - generator.append(", Locale::Locale::@locale@"); + generator.append(", Locale::@locale@"); generator.set("condition", casing.condition.is_empty() ? "None" : casing.condition); generator.append(", Condition::@condition@"); @@ -1096,18 +1104,29 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@) )~~~"); }; - auto append_from_string = [&](StringView enum_title, StringView enum_snake, PropList const& prop_list, Vector const& aliases) { + auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& prop_list, Vector const& aliases) { HashValueMap hashes; hashes.ensure_capacity(prop_list.size() + aliases.size()); - for (auto const& prop : prop_list) - hashes.set(prop.key.hash(), prop.key); + ValueFromStringOptions options {}; + + for (auto const& prop : prop_list) { + if constexpr (IsSame, String>) { + hashes.set(CaseInsensitiveStringViewTraits::hash(prop), prop); + options.sensitivity = CaseSensitivity::CaseInsensitive; + } else { + hashes.set(prop.key.hash(), prop.key); + } + } + for (auto const& alias : aliases) hashes.set(alias.alias.hash(), alias.alias); - generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes)); + generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes), options); }; + append_from_string("Locale"sv, "locale"sv, unicode_data.locales, {}); + append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv); append_from_string("GeneralCategory"sv, "general_category"sv, unicode_data.general_categories, unicode_data.general_category_aliases); diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 9a77b4b4be..f718d0d839 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #if ENABLE_UNICODE_DATA @@ -163,17 +162,17 @@ static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t in static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View const& string, Optional locale, size_t index, size_t byte_length) { - auto requested_locale = Locale::Locale::None; + auto requested_locale = Locale::None; if (locale.has_value()) { - if (auto maybe_locale = Locale::locale_from_string(*locale); maybe_locale.has_value()) + if (auto maybe_locale = locale_from_string(*locale); maybe_locale.has_value()) requested_locale = *maybe_locale; } auto special_casings = special_case_mapping(code_point); for (auto const* special_casing : special_casings) { - if (special_casing->locale != Locale::Locale::None && special_casing->locale != requested_locale) + if (special_casing->locale != Locale::None && special_casing->locale != requested_locale) continue; switch (special_casing->condition) {