From b76e44f66f2e8bbb72ff42ea26fb751e7087e74e Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 6 Dec 2021 15:46:49 -0500 Subject: [PATCH] LibUnicode: Parse and generate time zone names in long and short form --- .../GenerateUnicodeDateTimeFormat.cpp | 198 +++++++++++++++++- .../CodeGenerators/LibUnicode/GeneratorUtil.h | 2 +- .../Libraries/LibUnicode/DateTimeFormat.cpp | 9 + .../Libraries/LibUnicode/DateTimeFormat.h | 1 + 4 files changed, 208 insertions(+), 2 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index f3ffb509f9..86db26b7b2 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -144,8 +144,15 @@ struct Calendar { HashMap symbols {}; }; +struct TimeZone { + StringIndexType time_zone { 0 }; + StringIndexType long_name { 0 }; + StringIndexType short_name { 0 }; +}; + struct Locale { HashMap calendars; + HashMap time_zones; }; struct UnicodeLocaleData { @@ -156,6 +163,9 @@ struct UnicodeLocaleData { HashMap> hour_cycles; Vector hour_cycle_regions; + HashMap meta_zones; + Vector time_zones; + Vector calendars; Vector calendar_aliases { // FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158 @@ -209,6 +219,35 @@ static ErrorOr parse_hour_cycles(String core_path, UnicodeLocaleData& loca return {}; }; +static ErrorOr parse_meta_zones(String core_path, UnicodeLocaleData& locale_data) +{ + // https://unicode.org/reports/tr35/tr35-dates.html#Metazones + LexicalPath meta_zone_path(move(core_path)); + meta_zone_path = meta_zone_path.append("supplemental"sv); + meta_zone_path = meta_zone_path.append("metaZones.json"sv); + + auto meta_zone_file = TRY(Core::File::open(meta_zone_path.string(), Core::OpenMode::ReadOnly)); + auto meta_zone = TRY(JsonValue::from_string(meta_zone_file->read_all())); + + auto const& supplemental_object = meta_zone.as_object().get("supplemental"sv); + auto const& meta_zone_object = supplemental_object.as_object().get("metaZones"sv); + auto const& meta_zone_array = meta_zone_object.as_object().get("metazones"sv); + + meta_zone_array.as_array().for_each([&](JsonValue const& value) { + auto const& mapping = value.as_object().get("mapZone"sv); + auto const& meta_zone = mapping.as_object().get("_other"sv); + auto const& golden_zone = mapping.as_object().get("_type"sv); + + auto golden_zone_index = locale_data.unique_strings.ensure(golden_zone.as_string()); + locale_data.meta_zones.set(meta_zone.as_string(), golden_zone_index); + }); + + // UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence. + locale_data.meta_zones.set("UTC"sv, locale_data.unique_strings.ensure("UTC"sv)); + + return {}; +}; + static constexpr auto is_char(char ch) { return [ch](auto c) { return c == ch; }; @@ -766,9 +805,74 @@ static ErrorOr parse_calendars(String locale_calendars_path, UnicodeLocale return {}; } +static ErrorOr parse_time_zone_names(String locale_time_zone_names_path, UnicodeLocaleData& locale_data, Locale& locale) +{ + LexicalPath time_zone_names_path(move(locale_time_zone_names_path)); + time_zone_names_path = time_zone_names_path.append("timeZoneNames.json"sv); + + auto time_zone_names_file = TRY(Core::File::open(time_zone_names_path.string(), Core::OpenMode::ReadOnly)); + auto time_zone_names = TRY(JsonValue::from_string(time_zone_names_file->read_all())); + + auto const& main_object = time_zone_names.as_object().get("main"sv); + auto const& locale_object = main_object.as_object().get(time_zone_names_path.parent().basename()); + auto const& dates_object = locale_object.as_object().get("dates"sv); + auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv); + auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv); + + if (meta_zone_object.is_null()) + return {}; + + auto parse_name = [&](StringView type, JsonObject const& meta_zone_object) -> Optional { + auto const& names = meta_zone_object.get(type); + if (!names.is_object()) + return {}; + + auto const& daylight = names.as_object().get("daylight"sv); + if (daylight.is_string()) + return locale_data.unique_strings.ensure(daylight.as_string()); + + auto const& standard = names.as_object().get("standard"sv); + if (standard.is_string()) + return locale_data.unique_strings.ensure(standard.as_string()); + + return {}; + }; + + auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) { + auto golden_zone = locale_data.meta_zones.get(meta_zone).value(); + TimeZone time_zone { .time_zone = golden_zone }; + + if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value()) + time_zone.long_name = long_name.value(); + if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value()) + time_zone.short_name = short_name.value(); + + auto const& time_zone_name = locale_data.unique_strings.get(golden_zone); + + if (!locale_data.time_zones.contains_slow(time_zone_name)) + locale_data.time_zones.append(time_zone_name); + + locale.time_zones.set(time_zone_name, move(time_zone)); + }; + + meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const& value) { + parse_time_zone(meta_zone, value.as_object()); + }); + + // The long and short names for UTC are not under the "timeZoneNames/metazone" object, but are under "timeZoneNames/zone/Etc". + auto const& zone_object = time_zone_names_object.as_object().get("zone"sv); + auto const& etc_object = zone_object.as_object().get("Etc"sv); + auto const& utc_object = etc_object.as_object().get("UTC"sv); + parse_time_zone("UTC"sv, utc_object.as_object()); + + return {}; +} + static ErrorOr parse_all_locales(String core_path, String dates_path, UnicodeLocaleData& locale_data) { - TRY(parse_hour_cycles(move(core_path), locale_data)); + TRY(parse_hour_cycles(core_path, locale_data)); + TRY(parse_meta_zones(move(core_path), locale_data)); + auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); auto remove_variants_from_path = [&](String path) -> ErrorOr { @@ -795,6 +899,8 @@ static ErrorOr parse_all_locales(String core_path, String dates_path, Unic auto calendars_path = TRY(next_path_from_dir_iterator(calendars_iterator)); TRY(parse_calendars(move(calendars_path), locale_data, locale)); } + + TRY(parse_time_zone_names(move(dates_path), locale_data, locale)); } return {}; @@ -803,6 +909,7 @@ static ErrorOr parse_all_locales(String core_path, String dates_path, Unic static String format_identifier(StringView owner, String identifier) { identifier = identifier.replace("-"sv, "_"sv, true); + identifier = identifier.replace("/"sv, "_"sv, true); if (all_of(identifier, is_ascii_digit)) return String::formatted("{}_{}", owner[0], identifier); @@ -829,6 +936,7 @@ namespace Unicode { generate_enum(generator, format_identifier, "Calendar"sv, {}, locale_data.calendars, locale_data.calendar_aliases); generate_enum(generator, format_identifier, "HourCycleRegion"sv, {}, locale_data.hour_cycle_regions); generate_enum(generator, format_identifier, "CalendarSymbol"sv, {}, locale_data.symbols); + generate_enum(generator, format_identifier, "TimeZone"sv, {}, locale_data.time_zones); generator.append(R"~~~( namespace Detail { @@ -848,6 +956,9 @@ Optional get_calendar_month_symbol(StringView locale, StringView cal Optional get_calendar_weekday_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Weekday value); Optional get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value); +Optional time_zone_from_string(StringView time_zone); +Optional get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style); + } } @@ -959,6 +1070,12 @@ struct CalendarData { Span<@calendar_pattern_index_type@ const> available_formats {}; Array, @calendar_symbols_size@> symbols {}; }; + +struct TimeZoneData { + @string_index_type@ time_zone { 0 }; + @string_index_type@ long_name { 0 }; + @string_index_type@ short_name { 0 }; +}; )~~~"); auto append_calendar_format = [&](auto const& calendar_format) { @@ -1079,6 +1196,42 @@ static constexpr Array @name@ { {)~~~"); )~~~"); }; + auto append_time_zones = [&](String name, auto const& time_zones) { + generator.set("name", name); + generator.set("size", String::number(locale_data.time_zones.size())); + + generator.append(R"~~~( +static constexpr Array @name@ { {)~~~"); + + constexpr size_t max_values_per_row = 20; + size_t values_in_current_row = 0; + + for (auto const& time_zone_key : locale_data.time_zones) { + auto time_zone = time_zones.find(time_zone_key); + + if (values_in_current_row++ > 0) + generator.append(" "); + + if (time_zone == time_zones.end()) { + generator.append("{},"); + } else { + generator.set("time_zone", String::number(time_zone->value.time_zone)); + generator.set("long_name", String::number(time_zone->value.long_name)); + generator.set("short_name", String::number(time_zone->value.short_name)); + generator.append("{ @time_zone@, @long_name@, @short_name@ },"); + } + + if (values_in_current_row == max_values_per_row) { + values_in_current_row = 0; + generator.append("\n "); + } + } + + generator.append(R"~~~( +} }; +)~~~"); + }; + auto append_hour_cycles = [&](String name, auto const& hour_cycle_region) { auto const& hour_cycles = locale_data.hour_cycles.find(hour_cycle_region)->value; @@ -1097,6 +1250,7 @@ static constexpr Array @name@ { { )~~~"); }; generate_mapping(generator, locale_data.locales, "CalendarData"sv, "s_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); }); + generate_mapping(generator, locale_data.locales, "TimeZoneData"sv, "s_time_zones"sv, "s_time_zones_{}", [&](auto const& name, auto const& value) { append_time_zones(name, value.time_zones); }); generate_mapping(generator, locale_data.hour_cycle_regions, "u8"sv, "s_hour_cycles"sv, "s_hour_cycles_{}", [&](auto const& name, auto const& value) { append_hour_cycles(name, value); }); auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector const& aliases = {}) { @@ -1113,6 +1267,7 @@ static constexpr Array @name@ { { )~~~"); append_from_string("Calendar"sv, "calendar"sv, locale_data.calendars, locale_data.calendar_aliases); append_from_string("HourCycleRegion"sv, "hour_cycle_region"sv, locale_data.hour_cycle_regions); + append_from_string("TimeZone"sv, "time_zone"sv, locale_data.time_zones); generator.append(R"~~~( Vector get_regional_hour_cycles(StringView region) @@ -1240,6 +1395,47 @@ Optional get_calendar_day_period_symbol(StringView locale, StringVie return {}; } +static TimeZoneData const* find_time_zone_data(StringView locale, StringView time_zone) +{ + auto locale_value = locale_from_string(locale); + if (!locale_value.has_value()) + return nullptr; + + auto time_zone_value = time_zone_from_string(time_zone); + if (!time_zone_value.has_value()) + return nullptr; + + auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. + auto time_zone_index = to_underlying(*time_zone_value); + + auto const& time_zones = s_time_zones.at(locale_index); + return &time_zones[time_zone_index]; +} + +Optional get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style) +{ + if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) { + @string_index_type@ time_zone_index = 0; + + switch (style) { + case CalendarPatternStyle::Long: + time_zone_index = data->long_name; + break; + case CalendarPatternStyle::Short: + time_zone_index = data->short_name; + break; + + default: + VERIFY_NOT_REACHED(); + } + + if (time_zone_index != 0) + return s_string_list[time_zone_index]; + } + + return {}; +} + } )~~~"); diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h index 167751bb72..ec4fe94a90 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h @@ -302,7 +302,7 @@ template void generate_mapping(SourceGenerator& generator, LocalesType const& locales, StringView type, StringView name, StringView format, ListFormatter&& format_list) { auto format_mapping_name = [](StringView format, StringView name) { - auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true); + auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true).replace("/"sv, "_"sv, true); return String::formatted(format, mapping_name); }; diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp index 96e1fa71aa..807fdac074 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp @@ -176,4 +176,13 @@ Optional get_calendar_day_period_symbol([[maybe_unused]] StringView #endif } +Optional get_time_zone_name([[maybe_unused]] StringView locale, [[maybe_unused]] StringView time_zone, [[maybe_unused]] CalendarPatternStyle style) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_time_zone_name(locale, time_zone, style); +#else + return {}; +#endif +} + } diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.h b/Userland/Libraries/LibUnicode/DateTimeFormat.h index 9ea3abcaff..e22a5ad63c 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.h +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.h @@ -142,5 +142,6 @@ Optional get_calendar_era_symbol(StringView locale, StringView calen Optional get_calendar_month_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Month value); Optional get_calendar_weekday_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Weekday value); Optional get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value); +Optional get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style); }