From 0b69e9f974ff023029e89b1fca3529a1c5b62d71 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 13 Apr 2023 10:28:16 -0400 Subject: [PATCH] LibLocale: Prepare locale data generator for breaking changes in CLDR 43 In CLDR 42 and earlier, we were able to assume all cldr-localename files existed for every locale. They now do not exist for locales that don't provide any localized data. Namely, this is the "und" locale (which is an alias for the root locale, i.e. the locale we fall back to when a user provides an unknown locale). Further, we were previously able to assume that each currencies.json in cldr-numbers contained all currencies. This file now excludes currencies whose localized names are the same as the currency key. Therefore, we now preprocess currencies.json to discover all currencies ahead of time, much like we already do for languages.json. --- .../LibLocale/GenerateLocaleData.cpp | 106 +++++++++++++----- Tests/LibLocale/TestLocale.cpp | 72 ++++++++++++ 2 files changed, 151 insertions(+), 27 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp index 9fb8e78f03..99ffa15178 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp @@ -302,12 +302,12 @@ static ErrorOr parse_likely_subtags(DeprecatedString core_supplemental_pat static ErrorOr parse_identity(DeprecatedString locale_path, CLDR& cldr, LocaleData& locale) { - LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them. - languages_path = languages_path.append("languages.json"sv); + LexicalPath locale_display_names_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them. + locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv); - auto languages = TRY(read_json_file(languages_path.string())); - auto const& main_object = languages.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(languages_path.parent().basename()).value(); + auto locale_display_names = TRY(read_json_file(locale_display_names_path.string())); + auto const& main_object = locale_display_names.as_object().get_object("main"sv).value(); + auto const& locale_object = main_object.get_object(locale_display_names_path.parent().basename()).value(); auto const& identity_object = locale_object.get_object("identity"sv).value(); auto const& language_string = identity_object.get_deprecated_string("language"sv).value(); auto const& territory_string = identity_object.get_deprecated_string("territory"sv); @@ -363,6 +363,9 @@ static ErrorOr preprocess_languages(DeprecatedString locale_path, CLDR& cl LexicalPath languages_path(move(locale_path)); languages_path = languages_path.append("languages.json"sv); + if (!FileSystem::exists(languages_path.string())) + return {}; + auto locale_languages = TRY(read_json_file(languages_path.string())); auto const& main_object = locale_languages.as_object().get_object("main"sv).value(); auto const& locale_object = main_object.get_object(languages_path.parent().basename()).value(); @@ -377,6 +380,25 @@ static ErrorOr preprocess_languages(DeprecatedString locale_path, CLDR& cl return {}; } +static ErrorOr preprocess_currencies(DeprecatedString numbers_path, CLDR& cldr) +{ + LexicalPath currencies_path(move(numbers_path)); + currencies_path = currencies_path.append("currencies.json"sv); + + auto locale_currencies = TRY(read_json_file(currencies_path.string())); + auto const& main_object = locale_currencies.as_object().get_object("main"sv).value(); + auto const& locale_object = main_object.get_object(currencies_path.parent().basename()).value(); + auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value(); + auto const& currencies_object = locale_numbers_object.get_object("currencies"sv).value(); + + currencies_object.for_each_member([&](auto const& key, JsonValue const&) { + if (!cldr.currencies.contains_slow(key)) + cldr.currencies.append(key); + }); + + return {}; +} + static ErrorOr parse_unicode_extension_keywords(DeprecatedString bcp47_path, CLDR& cldr) { constexpr auto desired_keywords = Array { "ca"sv, "co"sv, "hc"sv, "kf"sv, "kn"sv, "nu"sv }; @@ -446,15 +468,23 @@ static ErrorOr parse_locale_languages(DeprecatedString locale_path, CLDR& LexicalPath languages_path(move(locale_path)); languages_path = languages_path.append("languages.json"sv); + LanguageList languages; + languages.resize(cldr.languages.size()); + + if (!FileSystem::exists(languages_path.string())) { + for (size_t i = 0; i < languages.size(); ++i) + languages[i] = cldr.unique_strings.ensure(cldr.languages[i]); + + locale.languages = cldr.unique_language_lists.ensure(move(languages)); + return {}; + } + auto locale_languages = TRY(read_json_file(languages_path.string())); auto const& main_object = locale_languages.as_object().get_object("main"sv).value(); auto const& locale_object = main_object.get_object(languages_path.parent().basename()).value(); auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); auto const& languages_object = locale_display_names_object.get_object("languages"sv).value(); - LanguageList languages; - languages.resize(cldr.languages.size()); - languages_object.for_each_member([&](auto const& key, JsonValue const& value) { if (key.contains("-alt-"sv)) return; @@ -472,15 +502,23 @@ static ErrorOr parse_locale_territories(DeprecatedString locale_path, CLDR LexicalPath territories_path(move(locale_path)); territories_path = territories_path.append("territories.json"sv); + TerritoryList territories; + territories.resize(cldr.territories.size()); + + if (!FileSystem::exists(territories_path.string())) { + for (size_t i = 0; i < territories.size(); ++i) + territories[i] = cldr.unique_strings.ensure(cldr.territories[i]); + + locale.territories = cldr.unique_territory_lists.ensure(move(territories)); + return {}; + } + auto locale_territories = TRY(read_json_file(territories_path.string())); auto const& main_object = locale_territories.as_object().get_object("main"sv).value(); auto const& locale_object = main_object.get_object(territories_path.parent().basename()).value(); auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); auto const& territories_object = locale_display_names_object.get_object("territories"sv).value(); - TerritoryList territories; - territories.resize(cldr.territories.size()); - territories_object.for_each_member([&](auto const& key, JsonValue const& value) { if (auto index = cldr.territories.find_first_index(key); index.has_value()) territories[*index] = cldr.unique_strings.ensure(value.as_string()); @@ -495,15 +533,23 @@ static ErrorOr parse_locale_scripts(DeprecatedString locale_path, CLDR& cl LexicalPath scripts_path(move(locale_path)); scripts_path = scripts_path.append("scripts.json"sv); + ScriptList scripts; + scripts.resize(cldr.scripts.size()); + + if (!FileSystem::exists(scripts_path.string())) { + for (size_t i = 0; i < scripts.size(); ++i) + scripts[i] = cldr.unique_strings.ensure(cldr.scripts[i]); + + locale.scripts = cldr.unique_script_lists.ensure(move(scripts)); + return {}; + } + auto locale_scripts = TRY(read_json_file(scripts_path.string())); auto const& main_object = locale_scripts.as_object().get_object("main"sv).value(); auto const& locale_object = main_object.get_object(scripts_path.parent().basename()).value(); auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); auto const& scripts_object = locale_display_names_object.get_object("scripts"sv).value(); - ScriptList scripts; - scripts.resize(cldr.scripts.size()); - scripts_object.for_each_member([&](auto const& key, JsonValue const& value) { if (auto index = cldr.scripts.find_first_index(key); index.has_value()) scripts[*index] = cldr.unique_strings.ensure(value.as_string()); @@ -606,11 +652,6 @@ static ErrorOr parse_locale_currencies(DeprecatedString numbers_path, CLDR auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value(); auto const& currencies_object = locale_numbers_object.get_object("currencies"sv).value(); - currencies_object.for_each_member([&](auto const& key, JsonValue const&) { - if (!cldr.currencies.contains_slow(key)) - cldr.currencies.append(key); - }); - CurrencyList long_currencies {}; long_currencies.resize(cldr.currencies.size()); @@ -624,16 +665,16 @@ static ErrorOr parse_locale_currencies(DeprecatedString numbers_path, CLDR numeric_currencies.resize(cldr.currencies.size()); currencies_object.for_each_member([&](auto const& key, JsonValue const& value) { - auto const& long_name = value.as_object().get_deprecated_string("displayName"sv); - auto const& short_name = value.as_object().get_deprecated_string("symbol"sv); - auto const& narrow_name = value.as_object().get_deprecated_string("symbol-alt-narrow"sv); - auto const& numeric_name = value.as_object().get_deprecated_string("displayName-count-other"sv); + auto long_name = value.as_object().get_deprecated_string("displayName"sv).value_or(key); + auto short_name = value.as_object().get_deprecated_string("symbol"sv).value_or(key); + auto narrow_name = value.as_object().get_deprecated_string("symbol-alt-narrow"sv); + auto numeric_name = value.as_object().get_deprecated_string("displayName-count-other"sv); auto index = cldr.currencies.find_first_index(key).value(); - long_currencies[index] = cldr.unique_strings.ensure(long_name.value()); - short_currencies[index] = cldr.unique_strings.ensure(short_name.value()); - narrow_currencies[index] = narrow_name.has_value() ? cldr.unique_strings.ensure(narrow_name.value()) : 0; - numeric_currencies[index] = cldr.unique_strings.ensure(numeric_name.has_value() ? numeric_name.value() : long_name.value()); + long_currencies[index] = cldr.unique_strings.ensure(move(long_name)); + short_currencies[index] = cldr.unique_strings.ensure(move(short_name)); + narrow_currencies[index] = narrow_name.has_value() ? cldr.unique_strings.ensure(narrow_name.release_value()) : 0; + numeric_currencies[index] = numeric_name.has_value() ? cldr.unique_strings.ensure(numeric_name.release_value()) : long_currencies[index]; }); locale.long_currencies = cldr.unique_currency_lists.ensure(move(long_currencies)); @@ -652,6 +693,10 @@ static ErrorOr parse_locale_calendars(DeprecatedString locale_path, CLDR& auto const& main_object = locale_display_names.as_object().get_object("main"sv).value(); auto const& locale_object = main_object.get_object(locale_display_names_path.parent().basename()).value(); auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); + + if (!locale_display_names_object.has_object("types"sv)) + return {}; + auto const& types_object = locale_display_names_object.get_object("types"sv).value(); auto const& calendar_object = types_object.get_object("calendar"sv).value(); @@ -946,9 +991,16 @@ static ErrorOr parse_all_locales(DeprecatedString bcp47_path, DeprecatedSt return IterationDecision::Continue; })); + TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { + auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string(); + TRY(preprocess_currencies(numbers_path, cldr)); + return IterationDecision::Continue; + })); + quick_sort(cldr.languages); quick_sort(cldr.territories); quick_sort(cldr.scripts); + quick_sort(cldr.currencies); TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/bcp47", bcp47_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { auto bcp47_path = LexicalPath::join(directory.path().string(), entry.name).string(); diff --git a/Tests/LibLocale/TestLocale.cpp b/Tests/LibLocale/TestLocale.cpp index 7300faddac..754a819698 100644 --- a/Tests/LibLocale/TestLocale.cpp +++ b/Tests/LibLocale/TestLocale.cpp @@ -518,3 +518,75 @@ TEST_CASE(supports_locale_aliases) EXPECT(Locale::is_locale_available("zh-TW"sv)); EXPECT(Locale::is_locale_available("zh-Hant-TW"sv)); } + +TEST_CASE(locale_mappings_en) +{ + auto language = Locale::get_locale_language_mapping("en"sv, "en"sv); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "English"sv); + + language = Locale::get_locale_language_mapping("en"sv, "i-defintely-don't-exist"sv); + EXPECT(!language.has_value()); + + auto territory = Locale::get_locale_territory_mapping("en"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "United States"sv); + + territory = Locale::get_locale_territory_mapping("en"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::get_locale_script_mapping("en"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "Latin"sv); + + script = Locale::get_locale_script_mapping("en"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +} + +TEST_CASE(locale_mappings_fr) +{ + auto language = Locale::get_locale_language_mapping("fr"sv, "en"sv); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "anglais"sv); + + language = Locale::get_locale_language_mapping("fr"sv, "i-defintely-don't-exist"sv); + EXPECT(!language.has_value()); + + auto territory = Locale::get_locale_territory_mapping("fr"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "États-Unis"sv); + + territory = Locale::get_locale_territory_mapping("fr"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::get_locale_script_mapping("fr"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "latin"sv); + + script = Locale::get_locale_script_mapping("fr"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +} + +TEST_CASE(locale_mappings_root) +{ + auto language = Locale::get_locale_language_mapping("und"sv, "en"sv); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "en"sv); + + language = Locale::get_locale_language_mapping("und"sv, "i-defintely-don't-exist"sv); + EXPECT(!language.has_value()); + + auto territory = Locale::get_locale_territory_mapping("und"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "US"sv); + + territory = Locale::get_locale_territory_mapping("und"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::get_locale_script_mapping("und"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "Latn"sv); + + script = Locale::get_locale_script_mapping("und"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +}