mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:27:35 +00:00
LibUnicode: Do not assume time zones & meta zones have a 1-to-1 mapping
The generator parses metaZones.json to form a mapping of meta zones to time zones (AKA "golden zone" in TR-35). This parser errantly assumed this was a 1-to-1 mapping.
This commit is contained in:
parent
62d8d1fdfd
commit
6d7d9dd324
4 changed files with 83 additions and 10 deletions
|
@ -519,7 +519,7 @@ struct UnicodeLocaleData {
|
||||||
HashMap<String, HourCycleListIndexType> hour_cycles;
|
HashMap<String, HourCycleListIndexType> hour_cycles;
|
||||||
Vector<String> hour_cycle_regions;
|
Vector<String> hour_cycle_regions;
|
||||||
|
|
||||||
HashMap<String, String> meta_zones;
|
HashMap<String, Vector<String>> meta_zones;
|
||||||
Vector<String> time_zones { "UTC"sv };
|
Vector<String> time_zones { "UTC"sv };
|
||||||
|
|
||||||
Vector<String> calendars;
|
Vector<String> calendars;
|
||||||
|
@ -618,11 +618,12 @@ static ErrorOr<void> parse_meta_zones(String core_path, UnicodeLocaleData& local
|
||||||
auto const& meta_zone = mapping.as_object().get("_other"sv);
|
auto const& meta_zone = mapping.as_object().get("_other"sv);
|
||||||
auto const& golden_zone = mapping.as_object().get("_type"sv);
|
auto const& golden_zone = mapping.as_object().get("_type"sv);
|
||||||
|
|
||||||
locale_data.meta_zones.set(meta_zone.as_string(), golden_zone.as_string());
|
auto& golden_zones = locale_data.meta_zones.ensure(meta_zone.as_string());
|
||||||
|
golden_zones.append(golden_zone.as_string());
|
||||||
});
|
});
|
||||||
|
|
||||||
// UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence.
|
// UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence.
|
||||||
locale_data.meta_zones.set("UTC"sv, "UTC"sv);
|
locale_data.meta_zones.set("UTC"sv, { "UTC"sv });
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
};
|
};
|
||||||
|
@ -1402,7 +1403,7 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
|
||||||
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
|
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
|
||||||
|
|
||||||
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
|
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
|
||||||
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
|
auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value;
|
||||||
TimeZone time_zone {};
|
TimeZone time_zone {};
|
||||||
|
|
||||||
if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value())
|
if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value())
|
||||||
|
@ -1410,15 +1411,19 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
|
||||||
if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value())
|
if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value())
|
||||||
time_zone.short_name = short_name.value();
|
time_zone.short_name = short_name.value();
|
||||||
|
|
||||||
|
for (auto const& golden_zone : golden_zones) {
|
||||||
auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value();
|
auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value();
|
||||||
time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone));
|
time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const&) {
|
meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const&) {
|
||||||
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
|
auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value;
|
||||||
|
|
||||||
|
for (auto const& golden_zone : golden_zones) {
|
||||||
if (!locale_data.time_zones.contains_slow(golden_zone))
|
if (!locale_data.time_zones.contains_slow(golden_zone))
|
||||||
locale_data.time_zones.append(golden_zone);
|
locale_data.time_zones.append(golden_zone);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
time_zones.resize(locale_data.time_zones.size());
|
time_zones.resize(locale_data.time_zones.size());
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
set(TEST_SOURCES
|
set(TEST_SOURCES
|
||||||
TestUnicodeCharacterTypes.cpp
|
TestUnicodeCharacterTypes.cpp
|
||||||
|
TestUnicodeDateTimeFormat.cpp
|
||||||
TestUnicodeLocale.cpp
|
TestUnicodeLocale.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
67
Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp
Normal file
67
Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibTest/TestCase.h>
|
||||||
|
|
||||||
|
#include <AK/Array.h>
|
||||||
|
#include <AK/StringView.h>
|
||||||
|
#include <LibUnicode/DateTimeFormat.h>
|
||||||
|
|
||||||
|
TEST_CASE(time_zone_name)
|
||||||
|
{
|
||||||
|
struct TestData {
|
||||||
|
StringView locale;
|
||||||
|
Unicode::CalendarPatternStyle style;
|
||||||
|
StringView time_zone;
|
||||||
|
StringView expected_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr auto test_data = Array {
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "Coordinated Universal Time"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "GMT"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "GMT"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "GMT"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "GMT"sv },
|
||||||
|
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "التوقيت العالمي المنسق"sv },
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "غرينتش"sv },
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "غرينتش"sv },
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "غرينتش"sv },
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "غرينتش"sv },
|
||||||
|
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "Pacific Daylight Time"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Los_Angeles"sv, "PDT"sv },
|
||||||
|
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "توقيت المحيط الهادي الصيفي"sv },
|
||||||
|
// The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data.
|
||||||
|
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "Pacific Daylight Time"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Vancouver"sv, "PDT"sv },
|
||||||
|
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "توقيت المحيط الهادي الصيفي"sv },
|
||||||
|
// The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data.
|
||||||
|
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "Greenwich Mean Time"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Europe/London"sv, "GMT"sv },
|
||||||
|
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "توقيت غرينتش"sv },
|
||||||
|
// The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data.
|
||||||
|
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "Greenwich Mean Time"sv },
|
||||||
|
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Africa/Accra"sv, "GMT"sv },
|
||||||
|
|
||||||
|
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "توقيت غرينتش"sv },
|
||||||
|
// The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data.
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto const& test : test_data) {
|
||||||
|
auto time_zone = Unicode::get_time_zone_name(test.locale, test.time_zone, test.style);
|
||||||
|
VERIFY(time_zone.has_value());
|
||||||
|
EXPECT_EQ(*time_zone, test.expected_result);
|
||||||
|
}
|
||||||
|
}
|
|
@ -35,7 +35,7 @@ enum class ScriptTag : u8;
|
||||||
enum class StandardNumberFormatType : u8;
|
enum class StandardNumberFormatType : u8;
|
||||||
enum class Style : u8;
|
enum class Style : u8;
|
||||||
enum class Territory : u8;
|
enum class Territory : u8;
|
||||||
enum class TimeZone : u8;
|
enum class TimeZone : u16;
|
||||||
enum class Weekday : u8;
|
enum class Weekday : u8;
|
||||||
enum class WordBreakProperty : u8;
|
enum class WordBreakProperty : u8;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue