From 9b118f1f0616108e2cdcf5ce52f03f862e92e508 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 30 Aug 2021 14:56:23 -0400 Subject: [PATCH] LibUnicode: Generate Unicode locale alias data CLDR contains a set of aliases for languages, territories, etc. that no longer are meant to be used (e.g. due to deprecation). For example, the language "aam" is deprecated and should be canonicalized as "aas". --- .../LibUnicode/GenerateUnicodeLocale.cpp | 83 ++++++++++++++++++- Userland/Libraries/LibUnicode/Locale.cpp | 45 ++++++++++ Userland/Libraries/LibUnicode/Locale.h | 6 ++ 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp index 651581b90b..20fd806cc4 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp @@ -37,6 +37,11 @@ struct UnicodeLocaleData { Vector scripts; Vector variants; Vector currencies; + HashMap language_aliases; + HashMap territory_aliases; + HashMap script_aliases; + HashMap variant_aliases; + HashMap subdivision_aliases; }; static void write_to_file_if_different(Core::File& file, StringView contents) @@ -51,6 +56,36 @@ static void write_to_file_if_different(Core::File& file, StringView contents) VERIFY(file.write(contents)); } +static void parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data) +{ + LexicalPath core_aliases_path(move(core_supplemental_path)); + core_aliases_path = core_aliases_path.append("aliases.json"sv); + VERIFY(Core::File::exists(core_aliases_path.string())); + + auto core_aliases_file_or_error = Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly); + VERIFY(!core_aliases_file_or_error.is_error()); + + auto core_aliases = JsonParser(core_aliases_file_or_error.value()->read_all()).parse(); + VERIFY(core_aliases.has_value()); + + auto const& supplemental_object = core_aliases->as_object().get("supplemental"sv); + auto const& metadata_object = supplemental_object.as_object().get("metadata"sv); + auto const& alias_object = metadata_object.as_object().get("alias"sv); + + auto append_aliases = [](auto& alias_object, auto& alias_map) { + alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { + auto alias = value.as_object().get("_replacement"sv).as_string(); + alias_map.set(key, move(alias)); + }); + }; + + append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases); + append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases); + append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases); + append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases); + append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases); +} + static void parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them. @@ -195,11 +230,17 @@ static Core::DirIterator path_to_dir_iterator(String path) return iterator; } -static void parse_all_locales(String locale_names_path, String numbers_path, UnicodeLocaleData& locale_data) +static void parse_all_locales(String core_path, String locale_names_path, String numbers_path, UnicodeLocaleData& locale_data) { auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path)); auto numbers_iterator = path_to_dir_iterator(move(numbers_path)); + LexicalPath core_supplemental_path(move(core_path)); + core_supplemental_path = core_supplemental_path.append("supplemental"sv); + VERIFY(Core::File::is_directory(core_supplemental_path.string())); + + parse_core_aliases(core_supplemental_path.string(), locale_data); + while (locale_names_iterator.has_next()) { auto locale_path = locale_names_iterator.next_full_path(); VERIFY(Core::File::is_directory(locale_path)); @@ -286,16 +327,22 @@ Optional locale_from_string(StringView const& locale); Optional get_locale_language_mapping(StringView locale, StringView language); Optional language_from_string(StringView const& language); +Optional resolve_language_alias(StringView const& language); Optional get_locale_territory_mapping(StringView locale, StringView territory); Optional territory_from_string(StringView const& territory); +Optional resolve_territory_alias(StringView const& territory); Optional get_locale_script_tag_mapping(StringView locale, StringView script_tag); Optional script_tag_from_string(StringView const& script_tag); +Optional resolve_script_tag_alias(StringView const& script_tag); Optional get_locale_currency_mapping(StringView locale, StringView currency); Optional currency_from_string(StringView const& currency); +Optional resolve_variant_alias(StringView const& variant); +Optional resolve_subdivision_alias(StringView const& subdivision); + } } @@ -460,20 +507,52 @@ Optional<@enum_title@> @enum_snake@_from_string(StringView const& @enum_snake@) )~~~"); }; + auto append_alias_search = [&](StringView enum_snake, HashMap const& aliases) { + generator.set("enum_snake", enum_snake); + + generator.append(R"~~~( +Optional resolve_@enum_snake@_alias(StringView const& @enum_snake@) +{ + static HashMap @enum_snake@_aliases { {)~~~"); + + for (auto const& alias : aliases) { + generator.set("key"sv, alias.key); + generator.set("alias"sv, alias.value); + + generator.append(R"~~~( + { "@key@"sv, "@alias@"sv },)~~~"); + } + + generator.append(R"~~~( + } }; + + if (auto alias = @enum_snake@_aliases.get(@enum_snake@); alias.has_value()) + return alias.value(); + return {}; +} +)~~~"); + }; + append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys()); append_mapping_search("Language"sv, "language"sv, "s_languages"sv); append_from_string("Language"sv, "language"sv, locale_data.languages); + append_alias_search("language"sv, locale_data.language_aliases); append_mapping_search("Territory"sv, "territory"sv, "s_territories"sv); append_from_string("Territory"sv, "territory"sv, locale_data.territories); + append_alias_search("territory"sv, locale_data.territory_aliases); append_mapping_search("ScriptTag"sv, "script_tag"sv, "s_scripts"sv); append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts); + append_alias_search("script_tag"sv, locale_data.script_aliases); append_mapping_search("Currency"sv, "currency"sv, "s_currencies"sv); append_from_string("Currency"sv, "currency"sv, locale_data.currencies); + append_alias_search("variant"sv, locale_data.variant_aliases); + append_alias_search("subdivision"sv, locale_data.subdivision_aliases); + generator.append(R"~~~( } @@ -519,7 +598,7 @@ int main(int argc, char** argv) auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite); UnicodeLocaleData locale_data; - parse_all_locales(locale_names_path, numbers_path, locale_data); + parse_all_locales(core_path, locale_names_path, numbers_path, locale_data); generate_unicode_locale_header(generated_header_file, locale_data); generate_unicode_locale_implementation(generated_implementation_file, locale_data); diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index e9f8c3a6e9..a5d5ccbfaf 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -640,4 +640,49 @@ Optional get_locale_currency_mapping([[maybe_unused]] StringView loc #endif } +Optional resolve_language_alias(StringView language) +{ +#if ENABLE_UNICODE_DATA + return Detail::resolve_language_alias(language); +#else + return language; +#endif +} + +Optional resolve_territory_alias(StringView territory) +{ +#if ENABLE_UNICODE_DATA + return Detail::resolve_territory_alias(territory); +#else + return territory; +#endif +} + +Optional resolve_script_tag_alias(StringView script_tag) +{ +#if ENABLE_UNICODE_DATA + return Detail::resolve_script_tag_alias(script_tag); +#else + return script_tag; +#endif +} + +Optional resolve_variant_alias(StringView variant) +{ +#if ENABLE_UNICODE_DATA + return Detail::resolve_variant_alias(variant); +#else + return variant; +#endif +} + +Optional resolve_subdivision_alias(StringView subdivision) +{ +#if ENABLE_UNICODE_DATA + return Detail::resolve_subdivision_alias(subdivision); +#else + return subdivision; +#endif +} + } diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index e570c0e15b..de5bcb0b8b 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -75,4 +75,10 @@ Optional get_locale_territory_mapping(StringView locale, StringView Optional get_locale_script_mapping(StringView locale, StringView script); Optional get_locale_currency_mapping(StringView locale, StringView currency); +Optional resolve_language_alias(StringView language); +Optional resolve_territory_alias(StringView territory); +Optional resolve_script_tag_alias(StringView script_tag); +Optional resolve_variant_alias(StringView variant); +Optional resolve_subdivision_alias(StringView subdivision); + }