From 7f162c471d14e6c298226f085ffb0fc8cfff8c22 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 12 Jan 2022 16:15:41 -0500 Subject: [PATCH] LibUnicode: Parse and generate locale display names for calendars Note there's a bit of an unfortunate duplication in the calendar enum generated by UnicodeLocale and the existing enum generated by UnicodeDateTimeFormat. The former contains every calendar known to the CLDR, whereas the latter contains the calendars we've actually parsed for DateTimeFormat (currently only Gregorian). The new enum generated here can be removed once DateTimeFormat knows about all calendars. --- .../LibUnicode/GenerateUnicodeLocale.cpp | 52 ++++++++++++++++++- Userland/Libraries/LibUnicode/Forward.h | 3 +- Userland/Libraries/LibUnicode/Locale.cpp | 4 +- Userland/Libraries/LibUnicode/Locale.h | 4 +- 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp index 23197e4a47..82e05bf6a7 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2022, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -36,6 +36,9 @@ constexpr auto s_script_list_index_type = "u8"sv; using CurrencyListIndexType = u16; constexpr auto s_currency_list_index_type = "u16"sv; +using CalendarListIndexType = u8; +constexpr auto s_calendar_list_index_type = "u8"sv; + using KeywordListIndexType = u8; constexpr auto s_keyword_list_index_type = "u8"sv; @@ -109,6 +112,7 @@ using LanguageList = Vector; using TerritoryList = Vector; using ScriptList = Vector; using CurrencyList = Vector; +using CalendarList = Vector; using KeywordList = Vector; using ListPatternList = Vector; @@ -123,6 +127,7 @@ struct Locale { CurrencyListIndexType short_currencies { 0 }; CurrencyListIndexType narrow_currencies { 0 }; CurrencyListIndexType numeric_currencies { 0 }; + CalendarListIndexType calendars { 0 }; KeywordListIndexType keywords { 0 }; ListPatternListIndexType list_patterns { 0 }; }; @@ -138,6 +143,7 @@ struct UnicodeLocaleData { UniqueStorage unique_territory_lists; UniqueStorage unique_script_lists; UniqueStorage unique_currency_lists; + UniqueStorage unique_calendar_lists; UniqueStorage unique_keyword_lists; UniqueStorage unique_list_patterns; UniqueStorage unique_list_pattern_lists; @@ -150,6 +156,12 @@ struct UnicodeLocaleData { Vector scripts; Vector variants; Vector currencies; + Vector calendars; + Vector calendar_aliases { + // FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158 + { "ethiopic-amete-alem"sv, "ethioaa"sv }, + { "gregorian"sv, "gregory"sv }, + }; Vector keywords { "ca"sv, "nu"sv }; // FIXME: These should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158 Vector list_pattern_types; Vector list_pattern_styles; @@ -458,6 +470,37 @@ static ErrorOr parse_locale_currencies(String numbers_path, UnicodeLocaleD return {}; } +static ErrorOr parse_locale_calendars(String locale_path, UnicodeLocaleData& locale_data, Locale& locale) +{ + LexicalPath locale_display_names_path(move(locale_path)); + locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv); + + auto locale_display_names_file = TRY(Core::File::open(locale_display_names_path.string(), Core::OpenMode::ReadOnly)); + auto locale_display_names = TRY(JsonValue::from_string(locale_display_names_file->read_all())); + + auto const& main_object = locale_display_names.as_object().get("main"sv); + auto const& locale_object = main_object.as_object().get(locale_display_names_path.parent().basename()); + auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv); + auto const& types_object = locale_display_names_object.as_object().get("types"sv); + auto const& calendar_object = types_object.as_object().get("calendar"sv); + + calendar_object.as_object().for_each_member([&](auto const& key, auto const&) { + if (!locale_data.calendars.contains_slow(key)) + locale_data.calendars.append(key); + }); + + CalendarList calendars; + calendars.resize(locale_data.calendars.size()); + + calendar_object.as_object().for_each_member([&](auto const& key, auto const& calendar) { + auto index = locale_data.calendars.find_first_index(key).value(); + calendars[index] = locale_data.unique_strings.ensure(calendar.as_string()); + }); + + locale.calendars = locale_data.unique_calendar_lists.ensure(move(calendars)); + return {}; +} + static ErrorOr parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, KeywordList& keywords) { static constexpr StringView key = "nu"sv; @@ -679,6 +722,7 @@ static ErrorOr parse_all_locales(String core_path, String locale_names_pat TRY(parse_locale_languages(locale_path, locale_data, locale)); TRY(parse_locale_territories(locale_path, locale_data, locale)); TRY(parse_locale_scripts(locale_path, locale_data, locale)); + TRY(parse_locale_calendars(locale_path, locale_data, locale)); } while (misc_iterator.has_next()) { @@ -738,6 +782,7 @@ namespace Unicode { generate_enum(generator, format_identifier, "Territory"sv, {}, locale_data.territories); generate_enum(generator, format_identifier, "ScriptTag"sv, {}, locale_data.scripts); generate_enum(generator, format_identifier, "Currency"sv, {}, locale_data.currencies); + generate_enum(generator, format_identifier, "CalendarName"sv, {}, locale_data.calendars, locale_data.calendar_aliases); generate_enum(generator, format_identifier, "Key"sv, {}, locale_data.keywords); generate_enum(generator, format_identifier, "Variant"sv, {}, locale_data.variants); generate_enum(generator, format_identifier, "ListPatternType"sv, {}, locale_data.list_pattern_types); @@ -786,6 +831,7 @@ struct Patterns { locale_data.unique_territory_lists.generate(generator, s_string_index_type, "s_territory_lists"sv); locale_data.unique_script_lists.generate(generator, s_string_index_type, "s_script_lists"sv); locale_data.unique_currency_lists.generate(generator, s_string_index_type, "s_currency_lists"sv); + locale_data.unique_calendar_lists.generate(generator, s_string_index_type, "s_calendar_lists"sv); locale_data.unique_keyword_lists.generate(generator, s_string_index_type, "s_keyword_lists"sv); locale_data.unique_list_patterns.generate(generator, "Patterns"sv, "s_list_patterns"sv, 10); locale_data.unique_list_pattern_lists.generate(generator, s_list_pattern_index_type, "s_list_pattern_lists"sv); @@ -841,6 +887,7 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_short_currencies"sv, [&](auto const& locale) { return locale.short_currencies; }); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_narrow_currencies"sv, [&](auto const& locale) { return locale.narrow_currencies; }); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_numeric_currencies"sv, [&](auto const& locale) { return locale.numeric_currencies; }); + append_mapping(locales, locale_data.locales, s_calendar_list_index_type, "s_calendars"sv, [&](auto const& locale) { return locale.calendars; }); append_mapping(locales, locale_data.locales, s_keyword_list_index_type, "s_keywords"sv, [&](auto const& locale) { return locale.keywords; }); append_mapping(locales, locale_data.locales, s_list_pattern_list_index_type, "s_locale_list_patterns"sv, [&](auto const& locale) { return locale.list_patterns; }); @@ -1104,6 +1151,9 @@ Optional get_locale_@enum_snake@_mapping(StringView locale, StringVi append_mapping_search("narrow_currency"sv, "currency"sv, "s_narrow_currencies"sv, "s_currency_lists"sv); append_mapping_search("numeric_currency"sv, "currency"sv, "s_numeric_currencies"sv, "s_currency_lists"sv); + append_from_string("CalendarName"sv, "calendar_name"sv, locale_data.calendars, locale_data.calendar_aliases); + append_mapping_search("calendar"sv, "calendar_name"sv, "s_calendars"sv, "s_calendar_lists"sv); + append_from_string("Key"sv, "key"sv, locale_data.keywords); append_mapping_search("key"sv, "key"sv, "s_keywords"sv, "s_keyword_lists"sv); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index f1d8a0ba1c..de9640d741 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2022, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -13,6 +13,7 @@ namespace Unicode { enum class Calendar : u8; enum class CalendarFormatType : u8; enum class CalendarPatternStyle : u8; +enum class CalendarName : u8; enum class CalendarSymbol : u8; enum class CompactNumberFormatType : u8; enum class Condition : u8; diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index d1125d1826..9ed9420e2b 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2022, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -746,6 +746,7 @@ Optional __attribute__((weak)) language_from_string(StringView) { retu Optional __attribute__((weak)) territory_from_string(StringView) { return {}; } Optional __attribute__((weak)) script_tag_from_string(StringView) { return {}; } Optional __attribute__((weak)) currency_from_string(StringView) { return {}; } +Optional __attribute__((weak)) calendar_name_from_string(StringView) { return {}; } Optional __attribute__((weak)) key_from_string(StringView) { return {}; } Optional __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; } Optional __attribute__((weak)) list_pattern_style_from_string(StringView) { return {}; } @@ -756,6 +757,7 @@ Optional __attribute__((weak)) get_locale_long_currency_mapping(Stri Optional __attribute__((weak)) get_locale_short_currency_mapping(StringView, StringView) { return {}; } Optional __attribute__((weak)) get_locale_narrow_currency_mapping(StringView, StringView) { return {}; } Optional __attribute__((weak)) get_locale_numeric_currency_mapping(StringView, StringView) { return {}; } +Optional __attribute__((weak)) get_locale_calendar_mapping(StringView, StringView) { return {}; } Optional get_locale_currency_mapping(StringView locale, StringView currency, Style style) { diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index 2080fd1d5c..d410cf475e 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2022, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -146,6 +146,7 @@ Optional language_from_string(StringView language); Optional territory_from_string(StringView territory); Optional script_tag_from_string(StringView script_tag); Optional currency_from_string(StringView currency); +Optional calendar_name_from_string(StringView calendar); Optional key_from_string(StringView key); Optional list_pattern_type_from_string(StringView list_pattern_type); Optional list_pattern_style_from_string(StringView list_pattern_style); @@ -158,6 +159,7 @@ Optional get_locale_short_currency_mapping(StringView locale, String Optional get_locale_narrow_currency_mapping(StringView locale, StringView currency); Optional get_locale_numeric_currency_mapping(StringView locale, StringView currency); Optional get_locale_currency_mapping(StringView locale, StringView currency, Style style); +Optional get_locale_calendar_mapping(StringView locale, StringView calendar); Optional get_locale_key_mapping(StringView locale, StringView keyword); Vector get_locale_key_mapping_list(StringView locale, StringView keyword);