1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 12:48:10 +00:00

LibUnicode: Parse and generate locale display names for calendars

Note there's a bit of an unfortunate duplication in the calendar enum
generated by UnicodeLocale and the existing enum generated by
UnicodeDateTimeFormat. The former contains every calendar known to the
CLDR, whereas the latter contains the calendars we've actually parsed
for DateTimeFormat (currently only Gregorian). The new enum generated
here can be removed once DateTimeFormat knows about all calendars.
This commit is contained in:
Timothy Flynn 2022-01-12 16:15:41 -05:00 committed by Linus Groh
parent 1a3e6e8a7b
commit 7f162c471d
4 changed files with 59 additions and 4 deletions

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> * Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -36,6 +36,9 @@ constexpr auto s_script_list_index_type = "u8"sv;
using CurrencyListIndexType = u16; using CurrencyListIndexType = u16;
constexpr auto s_currency_list_index_type = "u16"sv; constexpr auto s_currency_list_index_type = "u16"sv;
using CalendarListIndexType = u8;
constexpr auto s_calendar_list_index_type = "u8"sv;
using KeywordListIndexType = u8; using KeywordListIndexType = u8;
constexpr auto s_keyword_list_index_type = "u8"sv; constexpr auto s_keyword_list_index_type = "u8"sv;
@ -109,6 +112,7 @@ using LanguageList = Vector<StringIndexType>;
using TerritoryList = Vector<StringIndexType>; using TerritoryList = Vector<StringIndexType>;
using ScriptList = Vector<StringIndexType>; using ScriptList = Vector<StringIndexType>;
using CurrencyList = Vector<StringIndexType>; using CurrencyList = Vector<StringIndexType>;
using CalendarList = Vector<StringIndexType>;
using KeywordList = Vector<StringIndexType>; using KeywordList = Vector<StringIndexType>;
using ListPatternList = Vector<ListPatternIndexType>; using ListPatternList = Vector<ListPatternIndexType>;
@ -123,6 +127,7 @@ struct Locale {
CurrencyListIndexType short_currencies { 0 }; CurrencyListIndexType short_currencies { 0 };
CurrencyListIndexType narrow_currencies { 0 }; CurrencyListIndexType narrow_currencies { 0 };
CurrencyListIndexType numeric_currencies { 0 }; CurrencyListIndexType numeric_currencies { 0 };
CalendarListIndexType calendars { 0 };
KeywordListIndexType keywords { 0 }; KeywordListIndexType keywords { 0 };
ListPatternListIndexType list_patterns { 0 }; ListPatternListIndexType list_patterns { 0 };
}; };
@ -138,6 +143,7 @@ struct UnicodeLocaleData {
UniqueStorage<TerritoryList, TerritoryListIndexType> unique_territory_lists; UniqueStorage<TerritoryList, TerritoryListIndexType> unique_territory_lists;
UniqueStorage<ScriptList, ScriptListIndexType> unique_script_lists; UniqueStorage<ScriptList, ScriptListIndexType> unique_script_lists;
UniqueStorage<CurrencyList, CurrencyListIndexType> unique_currency_lists; UniqueStorage<CurrencyList, CurrencyListIndexType> unique_currency_lists;
UniqueStorage<CalendarList, CalendarListIndexType> unique_calendar_lists;
UniqueStorage<KeywordList, KeywordListIndexType> unique_keyword_lists; UniqueStorage<KeywordList, KeywordListIndexType> unique_keyword_lists;
UniqueStorage<ListPatterns, ListPatternIndexType> unique_list_patterns; UniqueStorage<ListPatterns, ListPatternIndexType> unique_list_patterns;
UniqueStorage<ListPatternList, ListPatternListIndexType> unique_list_pattern_lists; UniqueStorage<ListPatternList, ListPatternListIndexType> unique_list_pattern_lists;
@ -150,6 +156,12 @@ struct UnicodeLocaleData {
Vector<String> scripts; Vector<String> scripts;
Vector<String> variants; Vector<String> variants;
Vector<String> currencies; Vector<String> currencies;
Vector<String> calendars;
Vector<Alias> calendar_aliases {
// FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158
{ "ethiopic-amete-alem"sv, "ethioaa"sv },
{ "gregorian"sv, "gregory"sv },
};
Vector<String> keywords { "ca"sv, "nu"sv }; // FIXME: These should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158 Vector<String> keywords { "ca"sv, "nu"sv }; // FIXME: These should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158
Vector<String> list_pattern_types; Vector<String> list_pattern_types;
Vector<String> list_pattern_styles; Vector<String> list_pattern_styles;
@ -458,6 +470,37 @@ static ErrorOr<void> parse_locale_currencies(String numbers_path, UnicodeLocaleD
return {}; return {};
} }
static ErrorOr<void> parse_locale_calendars(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
{
LexicalPath locale_display_names_path(move(locale_path));
locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv);
auto locale_display_names_file = TRY(Core::File::open(locale_display_names_path.string(), Core::OpenMode::ReadOnly));
auto locale_display_names = TRY(JsonValue::from_string(locale_display_names_file->read_all()));
auto const& main_object = locale_display_names.as_object().get("main"sv);
auto const& locale_object = main_object.as_object().get(locale_display_names_path.parent().basename());
auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
auto const& types_object = locale_display_names_object.as_object().get("types"sv);
auto const& calendar_object = types_object.as_object().get("calendar"sv);
calendar_object.as_object().for_each_member([&](auto const& key, auto const&) {
if (!locale_data.calendars.contains_slow(key))
locale_data.calendars.append(key);
});
CalendarList calendars;
calendars.resize(locale_data.calendars.size());
calendar_object.as_object().for_each_member([&](auto const& key, auto const& calendar) {
auto index = locale_data.calendars.find_first_index(key).value();
calendars[index] = locale_data.unique_strings.ensure(calendar.as_string());
});
locale.calendars = locale_data.unique_calendar_lists.ensure(move(calendars));
return {};
}
static ErrorOr<void> parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, KeywordList& keywords) static ErrorOr<void> parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, KeywordList& keywords)
{ {
static constexpr StringView key = "nu"sv; static constexpr StringView key = "nu"sv;
@ -679,6 +722,7 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
TRY(parse_locale_languages(locale_path, locale_data, locale)); TRY(parse_locale_languages(locale_path, locale_data, locale));
TRY(parse_locale_territories(locale_path, locale_data, locale)); TRY(parse_locale_territories(locale_path, locale_data, locale));
TRY(parse_locale_scripts(locale_path, locale_data, locale)); TRY(parse_locale_scripts(locale_path, locale_data, locale));
TRY(parse_locale_calendars(locale_path, locale_data, locale));
} }
while (misc_iterator.has_next()) { while (misc_iterator.has_next()) {
@ -738,6 +782,7 @@ namespace Unicode {
generate_enum(generator, format_identifier, "Territory"sv, {}, locale_data.territories); generate_enum(generator, format_identifier, "Territory"sv, {}, locale_data.territories);
generate_enum(generator, format_identifier, "ScriptTag"sv, {}, locale_data.scripts); generate_enum(generator, format_identifier, "ScriptTag"sv, {}, locale_data.scripts);
generate_enum(generator, format_identifier, "Currency"sv, {}, locale_data.currencies); generate_enum(generator, format_identifier, "Currency"sv, {}, locale_data.currencies);
generate_enum(generator, format_identifier, "CalendarName"sv, {}, locale_data.calendars, locale_data.calendar_aliases);
generate_enum(generator, format_identifier, "Key"sv, {}, locale_data.keywords); generate_enum(generator, format_identifier, "Key"sv, {}, locale_data.keywords);
generate_enum(generator, format_identifier, "Variant"sv, {}, locale_data.variants); generate_enum(generator, format_identifier, "Variant"sv, {}, locale_data.variants);
generate_enum(generator, format_identifier, "ListPatternType"sv, {}, locale_data.list_pattern_types); generate_enum(generator, format_identifier, "ListPatternType"sv, {}, locale_data.list_pattern_types);
@ -786,6 +831,7 @@ struct Patterns {
locale_data.unique_territory_lists.generate(generator, s_string_index_type, "s_territory_lists"sv); locale_data.unique_territory_lists.generate(generator, s_string_index_type, "s_territory_lists"sv);
locale_data.unique_script_lists.generate(generator, s_string_index_type, "s_script_lists"sv); locale_data.unique_script_lists.generate(generator, s_string_index_type, "s_script_lists"sv);
locale_data.unique_currency_lists.generate(generator, s_string_index_type, "s_currency_lists"sv); locale_data.unique_currency_lists.generate(generator, s_string_index_type, "s_currency_lists"sv);
locale_data.unique_calendar_lists.generate(generator, s_string_index_type, "s_calendar_lists"sv);
locale_data.unique_keyword_lists.generate(generator, s_string_index_type, "s_keyword_lists"sv); locale_data.unique_keyword_lists.generate(generator, s_string_index_type, "s_keyword_lists"sv);
locale_data.unique_list_patterns.generate(generator, "Patterns"sv, "s_list_patterns"sv, 10); locale_data.unique_list_patterns.generate(generator, "Patterns"sv, "s_list_patterns"sv, 10);
locale_data.unique_list_pattern_lists.generate(generator, s_list_pattern_index_type, "s_list_pattern_lists"sv); locale_data.unique_list_pattern_lists.generate(generator, s_list_pattern_index_type, "s_list_pattern_lists"sv);
@ -841,6 +887,7 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_short_currencies"sv, [&](auto const& locale) { return locale.short_currencies; }); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_short_currencies"sv, [&](auto const& locale) { return locale.short_currencies; });
append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_narrow_currencies"sv, [&](auto const& locale) { return locale.narrow_currencies; }); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_narrow_currencies"sv, [&](auto const& locale) { return locale.narrow_currencies; });
append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_numeric_currencies"sv, [&](auto const& locale) { return locale.numeric_currencies; }); append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_numeric_currencies"sv, [&](auto const& locale) { return locale.numeric_currencies; });
append_mapping(locales, locale_data.locales, s_calendar_list_index_type, "s_calendars"sv, [&](auto const& locale) { return locale.calendars; });
append_mapping(locales, locale_data.locales, s_keyword_list_index_type, "s_keywords"sv, [&](auto const& locale) { return locale.keywords; }); append_mapping(locales, locale_data.locales, s_keyword_list_index_type, "s_keywords"sv, [&](auto const& locale) { return locale.keywords; });
append_mapping(locales, locale_data.locales, s_list_pattern_list_index_type, "s_locale_list_patterns"sv, [&](auto const& locale) { return locale.list_patterns; }); append_mapping(locales, locale_data.locales, s_list_pattern_list_index_type, "s_locale_list_patterns"sv, [&](auto const& locale) { return locale.list_patterns; });
@ -1104,6 +1151,9 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
append_mapping_search("narrow_currency"sv, "currency"sv, "s_narrow_currencies"sv, "s_currency_lists"sv); append_mapping_search("narrow_currency"sv, "currency"sv, "s_narrow_currencies"sv, "s_currency_lists"sv);
append_mapping_search("numeric_currency"sv, "currency"sv, "s_numeric_currencies"sv, "s_currency_lists"sv); append_mapping_search("numeric_currency"sv, "currency"sv, "s_numeric_currencies"sv, "s_currency_lists"sv);
append_from_string("CalendarName"sv, "calendar_name"sv, locale_data.calendars, locale_data.calendar_aliases);
append_mapping_search("calendar"sv, "calendar_name"sv, "s_calendars"sv, "s_calendar_lists"sv);
append_from_string("Key"sv, "key"sv, locale_data.keywords); append_from_string("Key"sv, "key"sv, locale_data.keywords);
append_mapping_search("key"sv, "key"sv, "s_keywords"sv, "s_keyword_lists"sv); append_mapping_search("key"sv, "key"sv, "s_keywords"sv, "s_keyword_lists"sv);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> * Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -13,6 +13,7 @@ namespace Unicode {
enum class Calendar : u8; enum class Calendar : u8;
enum class CalendarFormatType : u8; enum class CalendarFormatType : u8;
enum class CalendarPatternStyle : u8; enum class CalendarPatternStyle : u8;
enum class CalendarName : u8;
enum class CalendarSymbol : u8; enum class CalendarSymbol : u8;
enum class CompactNumberFormatType : u8; enum class CompactNumberFormatType : u8;
enum class Condition : u8; enum class Condition : u8;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> * Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -746,6 +746,7 @@ Optional<Language> __attribute__((weak)) language_from_string(StringView) { retu
Optional<Territory> __attribute__((weak)) territory_from_string(StringView) { return {}; } Optional<Territory> __attribute__((weak)) territory_from_string(StringView) { return {}; }
Optional<ScriptTag> __attribute__((weak)) script_tag_from_string(StringView) { return {}; } Optional<ScriptTag> __attribute__((weak)) script_tag_from_string(StringView) { return {}; }
Optional<Currency> __attribute__((weak)) currency_from_string(StringView) { return {}; } Optional<Currency> __attribute__((weak)) currency_from_string(StringView) { return {}; }
Optional<CalendarName> __attribute__((weak)) calendar_name_from_string(StringView) { return {}; }
Optional<Key> __attribute__((weak)) key_from_string(StringView) { return {}; } Optional<Key> __attribute__((weak)) key_from_string(StringView) { return {}; }
Optional<ListPatternType> __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; } Optional<ListPatternType> __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; }
Optional<ListPatternStyle> __attribute__((weak)) list_pattern_style_from_string(StringView) { return {}; } Optional<ListPatternStyle> __attribute__((weak)) list_pattern_style_from_string(StringView) { return {}; }
@ -756,6 +757,7 @@ Optional<StringView> __attribute__((weak)) get_locale_long_currency_mapping(Stri
Optional<StringView> __attribute__((weak)) get_locale_short_currency_mapping(StringView, StringView) { return {}; } Optional<StringView> __attribute__((weak)) get_locale_short_currency_mapping(StringView, StringView) { return {}; }
Optional<StringView> __attribute__((weak)) get_locale_narrow_currency_mapping(StringView, StringView) { return {}; } Optional<StringView> __attribute__((weak)) get_locale_narrow_currency_mapping(StringView, StringView) { return {}; }
Optional<StringView> __attribute__((weak)) get_locale_numeric_currency_mapping(StringView, StringView) { return {}; } Optional<StringView> __attribute__((weak)) get_locale_numeric_currency_mapping(StringView, StringView) { return {}; }
Optional<StringView> __attribute__((weak)) get_locale_calendar_mapping(StringView, StringView) { return {}; }
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style) Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style)
{ {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> * Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -146,6 +146,7 @@ Optional<Language> language_from_string(StringView language);
Optional<Territory> territory_from_string(StringView territory); Optional<Territory> territory_from_string(StringView territory);
Optional<ScriptTag> script_tag_from_string(StringView script_tag); Optional<ScriptTag> script_tag_from_string(StringView script_tag);
Optional<Currency> currency_from_string(StringView currency); Optional<Currency> currency_from_string(StringView currency);
Optional<CalendarName> calendar_name_from_string(StringView calendar);
Optional<Key> key_from_string(StringView key); Optional<Key> key_from_string(StringView key);
Optional<ListPatternType> list_pattern_type_from_string(StringView list_pattern_type); Optional<ListPatternType> list_pattern_type_from_string(StringView list_pattern_type);
Optional<ListPatternStyle> list_pattern_style_from_string(StringView list_pattern_style); Optional<ListPatternStyle> list_pattern_style_from_string(StringView list_pattern_style);
@ -158,6 +159,7 @@ Optional<StringView> get_locale_short_currency_mapping(StringView locale, String
Optional<StringView> get_locale_narrow_currency_mapping(StringView locale, StringView currency); Optional<StringView> get_locale_narrow_currency_mapping(StringView locale, StringView currency);
Optional<StringView> get_locale_numeric_currency_mapping(StringView locale, StringView currency); Optional<StringView> get_locale_numeric_currency_mapping(StringView locale, StringView currency);
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style); Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style);
Optional<StringView> get_locale_calendar_mapping(StringView locale, StringView calendar);
Optional<StringView> get_locale_key_mapping(StringView locale, StringView keyword); Optional<StringView> get_locale_key_mapping(StringView locale, StringView keyword);
Vector<StringView> get_locale_key_mapping_list(StringView locale, StringView keyword); Vector<StringView> get_locale_key_mapping_list(StringView locale, StringView keyword);