From c86f7a675dc6b747a0f4189f89505749b9a1e130 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 13 Jan 2022 14:02:12 -0500 Subject: [PATCH] LibUnicode: Do not limit language display names to known locales Currently, the UnicodeLocale generator collects a list of known locales from the CLDR before processing language display names. For each locale, the identifier is broken into language, script, and region subtags, and we create a list of seen languages. When processing display names, we skip languages we hadn't seen in that first step. This is insufficient for language display names like "en-GB", which do not have an locale entry in the CLDR, and thus are skipped. So instead, create the list of known languages by actually reading through the list of languages which have a display name. --- .../LibUnicode/GenerateUnicodeLocale.cpp | 40 ++++++++++++++++--- Userland/Libraries/LibUnicode/Forward.h | 2 +- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp index f40df36282..555ef475c3 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp @@ -21,8 +21,8 @@ #include #include -using StringIndexType = u16; -constexpr auto s_string_index_type = "u16"sv; +using StringIndexType = u32; +constexpr auto s_string_index_type = "u32"sv; using DisplayPatternIndexType = u8; constexpr auto s_display_pattern_index_type = "u8"sv; @@ -319,8 +319,6 @@ static ErrorOr parse_identity(String locale_path, UnicodeLocaleData& local auto const& variant_string = identity_object.as_object().get("variant"sv); locale.language = language_string.as_string(); - if (!locale_data.languages.contains_slow(locale.language)) - locale_data.languages.append(locale.language); if (territory_string.is_string()) { locale.territory = territory_string.as_string(); @@ -366,6 +364,27 @@ static ErrorOr parse_locale_display_patterns(String locale_path, UnicodeLo return {}; } +static ErrorOr preprocess_languages(String locale_path, UnicodeLocaleData& locale_data) +{ + LexicalPath languages_path(move(locale_path)); + languages_path = languages_path.append("languages.json"sv); + + auto languages_file = TRY(Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly)); + auto locale_languages = TRY(JsonValue::from_string(languages_file->read_all())); + + auto const& main_object = locale_languages.as_object().get("main"sv); + auto const& locale_object = main_object.as_object().get(languages_path.parent().basename()); + auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv); + auto const& languages_object = locale_display_names_object.as_object().get("languages"sv); + + languages_object.as_object().for_each_member([&](auto const& key, auto const&) { + if (!key.contains("-alt-"sv) && !locale_data.languages.contains_slow(key)) + locale_data.languages.append(key); + }); + + return {}; +} + static ErrorOr parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath languages_path(move(locale_path)); @@ -383,8 +402,11 @@ static ErrorOr parse_locale_languages(String locale_path, UnicodeLocaleDat languages.resize(locale_data.languages.size()); languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { - if (auto index = locale_data.languages.find_first_index(key); index.has_value()) - languages[*index] = locale_data.unique_strings.ensure(value.as_string()); + if (key.contains("-alt-"sv)) + return; + + auto index = locale_data.languages.find_first_index(key).value(); + languages[index] = locale_data.unique_strings.ensure(value.as_string()); }); locale.languages = locale_data.unique_language_lists.ensure(move(languages)); @@ -802,6 +824,7 @@ static ErrorOr define_aliases_without_scripts(UnicodeLocaleData& locale_da static ErrorOr parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, String dates_path, UnicodeLocaleData& locale_data) { auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path)); + auto preprocess_iterator = TRY(path_to_dir_iterator(locale_names_path)); auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path))); auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path))); auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path))); @@ -835,6 +858,11 @@ static ErrorOr parse_all_locales(String core_path, String locale_names_pat TRY(parse_identity(locale_path, locale_data, locale)); } + while (preprocess_iterator.has_next()) { + auto locale_path = TRY(next_path_from_dir_iterator(preprocess_iterator)); + TRY(preprocess_languages(locale_path, locale_data)); + } + quick_sort(locale_data.languages); quick_sort(locale_data.territories); quick_sort(locale_data.scripts); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 0c2ea6ee01..6080fdd42a 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -25,7 +25,7 @@ enum class GeneralCategory : u8; enum class HourCycle : u8; enum class HourCycleRegion : u8; enum class Key : u8; -enum class Language : u8; +enum class Language : u16; enum class ListPatternStyle : u8; enum class ListPatternType : u8; enum class Locale : u16;