mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 20:32:44 +00:00 
			
		
		
		
	LibUnicode: Parse and generate relative-time format patterns
Relative-time format patterns are of one of two forms:
    * Tensed - refer to the past or the future, e.g. "N years ago" or
      "in N years".
    * Numbered - refer to a specific numeric value, e.g. "in 1 year"
      becomes "next year" and "in 0 years" becomes "this year".
In ECMA-402, tensed and numbered refer to the numeric formatting options
of "always" and "auto", respectively.
			
			
This commit is contained in:
		
							parent
							
								
									27eda77c97
								
							
						
					
					
						commit
						789f093b2e
					
				
					 4 changed files with 295 additions and 1 deletions
				
			
		|  | @ -17,18 +17,133 @@ | ||||||
| #include <LibCore/ArgsParser.h> | #include <LibCore/ArgsParser.h> | ||||||
| #include <LibCore/DirIterator.h> | #include <LibCore/DirIterator.h> | ||||||
| #include <LibCore/File.h> | #include <LibCore/File.h> | ||||||
|  | #include <LibUnicode/Locale.h> | ||||||
|  | #include <LibUnicode/RelativeTimeFormat.h> | ||||||
| 
 | 
 | ||||||
| using StringIndexType = u16; | using StringIndexType = u16; | ||||||
| constexpr auto s_string_index_type = "u16"sv; | constexpr auto s_string_index_type = "u16"sv; | ||||||
| 
 | 
 | ||||||
|  | using RelativeTimeFormatIndexType = u16; | ||||||
|  | constexpr auto s_relative_time_format_index_type = "u16"sv; | ||||||
|  | 
 | ||||||
|  | struct RelativeTimeFormat { | ||||||
|  |     unsigned hash() const | ||||||
|  |     { | ||||||
|  |         auto hash = time_unit.hash(); | ||||||
|  |         hash = pair_int_hash(hash, style.hash()); | ||||||
|  |         hash = pair_int_hash(hash, plurality.hash()); | ||||||
|  |         hash = pair_int_hash(hash, tense_or_number); | ||||||
|  |         hash = pair_int_hash(hash, pattern); | ||||||
|  |         return hash; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator==(RelativeTimeFormat const& other) const | ||||||
|  |     { | ||||||
|  |         return (time_unit == other.time_unit) | ||||||
|  |             && (plurality == other.plurality) | ||||||
|  |             && (style == other.style) | ||||||
|  |             && (tense_or_number == other.tense_or_number) | ||||||
|  |             && (pattern == other.pattern); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     String time_unit; | ||||||
|  |     String style; | ||||||
|  |     String plurality; | ||||||
|  |     StringIndexType tense_or_number { 0 }; | ||||||
|  |     StringIndexType pattern { 0 }; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> { | ||||||
|  |     ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format) | ||||||
|  |     { | ||||||
|  |         return Formatter<FormatString>::format(builder, | ||||||
|  |             "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}", | ||||||
|  |             format.time_unit, | ||||||
|  |             format.style, | ||||||
|  |             format.plurality, | ||||||
|  |             format.tense_or_number, | ||||||
|  |             format.pattern); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> { | ||||||
|  |     static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct Locale { | struct Locale { | ||||||
|  |     Vector<RelativeTimeFormatIndexType> time_units; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct UnicodeLocaleData { | struct UnicodeLocaleData { | ||||||
|     UniqueStringStorage<StringIndexType> unique_strings; |     UniqueStringStorage<StringIndexType> unique_strings; | ||||||
|  |     UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats; | ||||||
|  | 
 | ||||||
|     HashMap<String, Locale> locales; |     HashMap<String, Locale> locales; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale) | ||||||
|  | { | ||||||
|  |     LexicalPath date_fields_path(move(locale_dates_path)); | ||||||
|  |     date_fields_path = date_fields_path.append("dateFields.json"sv); | ||||||
|  | 
 | ||||||
|  |     auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly)); | ||||||
|  |     auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all())); | ||||||
|  | 
 | ||||||
|  |     auto const& main_object = date_fields.as_object().get("main"sv); | ||||||
|  |     auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename()); | ||||||
|  |     auto const& dates_object = locale_object.as_object().get("dates"sv); | ||||||
|  |     auto const& fields_object = dates_object.as_object().get("fields"sv); | ||||||
|  | 
 | ||||||
|  |     auto is_sanctioned_unit = [](auto unit) { | ||||||
|  |         // This is a copy of the time units sanctioned for use within ECMA-402.
 | ||||||
|  |         // https://tc39.es/ecma402/#sec-singularrelativetimeunit
 | ||||||
|  |         return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) { | ||||||
|  |         RelativeTimeFormat format {}; | ||||||
|  |         format.time_unit = unit.to_titlecase_string(); | ||||||
|  |         format.style = style.to_titlecase_string(); | ||||||
|  |         format.plurality = plurality.to_titlecase_string(); | ||||||
|  |         format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number); | ||||||
|  |         format.pattern = locale_data.unique_strings.ensure(pattern.as_string()); | ||||||
|  | 
 | ||||||
|  |         locale.time_units.append(locale_data.unique_formats.ensure(move(format))); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) { | ||||||
|  |         auto segments = unit_and_style.split_view('-'); | ||||||
|  |         auto unit = segments[0]; | ||||||
|  |         auto style = (segments.size() > 1) ? segments[1] : "long"sv; | ||||||
|  | 
 | ||||||
|  |         if (!is_sanctioned_unit(unit)) | ||||||
|  |             return; | ||||||
|  | 
 | ||||||
|  |         patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) { | ||||||
|  |             constexpr auto number_key = "relative-type-"sv; | ||||||
|  |             constexpr auto tense_key = "relativeTime-type-"sv; | ||||||
|  |             constexpr auto plurality_key = "relativeTimePattern-count-"sv; | ||||||
|  | 
 | ||||||
|  |             if (type.starts_with(number_key)) { | ||||||
|  |                 auto number = type.substring_view(number_key.length()); | ||||||
|  |                 parse_pattern(unit, style, "Other"sv, number, pattern_value); | ||||||
|  |             } else if (type.starts_with(tense_key)) { | ||||||
|  |                 pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) { | ||||||
|  |                     VERIFY(key.starts_with(plurality_key)); | ||||||
|  |                     auto plurality = key.substring_view(plurality_key.length()); | ||||||
|  |                     auto tense = type.substring_view(tense_key.length()); | ||||||
|  | 
 | ||||||
|  |                     parse_pattern(unit, style, plurality, tense, pattern); | ||||||
|  |                 }); | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |     }); | ||||||
|  | 
 | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data) | static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data) | ||||||
| { | { | ||||||
|     auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); |     auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); | ||||||
|  | @ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc | ||||||
|         auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); |         auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); | ||||||
|         auto language = TRY(remove_variants_from_path(dates_path)); |         auto language = TRY(remove_variants_from_path(dates_path)); | ||||||
| 
 | 
 | ||||||
|         [[maybe_unused]] auto& locale = locale_data.locales.ensure(language); |         auto& locale = locale_data.locales.ensure(language); | ||||||
|  |         TRY(parse_date_fields(move(dates_path), locale_data, locale)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return {}; |     return {}; | ||||||
|  | @ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca | ||||||
|     StringBuilder builder; |     StringBuilder builder; | ||||||
|     SourceGenerator generator { builder }; |     SourceGenerator generator { builder }; | ||||||
|     generator.set("string_index_type"sv, s_string_index_type); |     generator.set("string_index_type"sv, s_string_index_type); | ||||||
|  |     generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type); | ||||||
| 
 | 
 | ||||||
|     generator.append(R"~~~( |     generator.append(R"~~~( | ||||||
| #include <AK/Array.h> | #include <AK/Array.h> | ||||||
| #include <AK/StringView.h> | #include <AK/StringView.h> | ||||||
|  | #include <AK/Vector.h> | ||||||
|  | #include <LibUnicode/Locale.h> | ||||||
|  | #include <LibUnicode/RelativeTimeFormat.h> | ||||||
| #include <LibUnicode/UnicodeRelativeTimeFormat.h> | #include <LibUnicode/UnicodeRelativeTimeFormat.h> | ||||||
| 
 | 
 | ||||||
| namespace Unicode { | namespace Unicode { | ||||||
|  | @ -93,6 +213,73 @@ namespace Unicode { | ||||||
|     locale_data.unique_strings.generate(generator); |     locale_data.unique_strings.generate(generator); | ||||||
| 
 | 
 | ||||||
|     generator.append(R"~~~( |     generator.append(R"~~~( | ||||||
|  | struct RelativeTimeFormatImpl { | ||||||
|  |     RelativeTimeFormat to_relative_time_format() const | ||||||
|  |     { | ||||||
|  |         RelativeTimeFormat relative_time_format {}; | ||||||
|  |         relative_time_format.plurality = plurality; | ||||||
|  |         relative_time_format.pattern = s_string_list[pattern]; | ||||||
|  | 
 | ||||||
|  |         return relative_time_format; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     TimeUnit time_unit; | ||||||
|  |     Style style; | ||||||
|  |     RelativeTimeFormat::Plurality plurality; | ||||||
|  |     @string_index_type@ tense_or_number { 0 }; | ||||||
|  |     @string_index_type@ pattern { 0 }; | ||||||
|  | }; | ||||||
|  | )~~~"); | ||||||
|  | 
 | ||||||
|  |     locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10); | ||||||
|  | 
 | ||||||
|  |     auto append_list = [&](String name, auto const& list) { | ||||||
|  |         generator.set("name", name); | ||||||
|  |         generator.set("size", String::number(list.size())); | ||||||
|  | 
 | ||||||
|  |         generator.append(R"~~~( | ||||||
|  | static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~"); | ||||||
|  | 
 | ||||||
|  |         bool first = true; | ||||||
|  |         for (auto index : list) { | ||||||
|  |             generator.append(first ? " " : ", "); | ||||||
|  |             generator.append(String::number(index)); | ||||||
|  |             first = false; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         generator.append(" } };"); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); }); | ||||||
|  | 
 | ||||||
|  |     generator.append(R"~~~( | ||||||
|  | Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style) | ||||||
|  | { | ||||||
|  |     Vector<RelativeTimeFormat> formats; | ||||||
|  | 
 | ||||||
|  |     auto locale_value = locale_from_string(locale); | ||||||
|  |     if (!locale_value.has_value()) | ||||||
|  |         return formats; | ||||||
|  | 
 | ||||||
|  |     auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
 | ||||||
|  |     auto const& locale_formats = s_locale_relative_time_formats.at(locale_index); | ||||||
|  | 
 | ||||||
|  |     for (auto const& locale_format_index : locale_formats) { | ||||||
|  |         auto const& locale_format = s_relative_time_formats.at(locale_format_index); | ||||||
|  | 
 | ||||||
|  |         if (locale_format.time_unit != time_unit) | ||||||
|  |             continue; | ||||||
|  |         if (locale_format.style != style) | ||||||
|  |             continue; | ||||||
|  |         if (s_string_list[locale_format.tense_or_number] != tense_or_number) | ||||||
|  |             continue; | ||||||
|  | 
 | ||||||
|  |         formats.append(locale_format.to_relative_time_format()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return formats; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } | } | ||||||
| )~~~"); | )~~~"); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ set(SOURCES | ||||||
|     DateTimeFormat.cpp |     DateTimeFormat.cpp | ||||||
|     Locale.cpp |     Locale.cpp | ||||||
|     NumberFormat.cpp |     NumberFormat.cpp | ||||||
|  |     RelativeTimeFormat.cpp | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| serenity_lib(LibUnicode unicode) | serenity_lib(LibUnicode unicode) | ||||||
|  |  | ||||||
							
								
								
									
										58
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> | ||||||
|  |  * | ||||||
|  |  * SPDX-License-Identifier: BSD-2-Clause | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <LibUnicode/RelativeTimeFormat.h> | ||||||
|  | 
 | ||||||
|  | namespace Unicode { | ||||||
|  | 
 | ||||||
|  | Optional<TimeUnit> time_unit_from_string(StringView time_unit) | ||||||
|  | { | ||||||
|  |     if (time_unit == "second"sv) | ||||||
|  |         return TimeUnit::Second; | ||||||
|  |     if (time_unit == "minute"sv) | ||||||
|  |         return TimeUnit::Minute; | ||||||
|  |     if (time_unit == "hour"sv) | ||||||
|  |         return TimeUnit::Hour; | ||||||
|  |     if (time_unit == "day"sv) | ||||||
|  |         return TimeUnit::Day; | ||||||
|  |     if (time_unit == "week"sv) | ||||||
|  |         return TimeUnit::Week; | ||||||
|  |     if (time_unit == "month"sv) | ||||||
|  |         return TimeUnit::Month; | ||||||
|  |     if (time_unit == "quarter"sv) | ||||||
|  |         return TimeUnit::Quarter; | ||||||
|  |     if (time_unit == "year"sv) | ||||||
|  |         return TimeUnit::Year; | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | StringView time_unit_to_string(TimeUnit time_unit) | ||||||
|  | { | ||||||
|  |     switch (time_unit) { | ||||||
|  |     case TimeUnit::Second: | ||||||
|  |         return "second"sv; | ||||||
|  |     case TimeUnit::Minute: | ||||||
|  |         return "minute"sv; | ||||||
|  |     case TimeUnit::Hour: | ||||||
|  |         return "hour"sv; | ||||||
|  |     case TimeUnit::Day: | ||||||
|  |         return "day"sv; | ||||||
|  |     case TimeUnit::Week: | ||||||
|  |         return "week"sv; | ||||||
|  |     case TimeUnit::Month: | ||||||
|  |         return "month"sv; | ||||||
|  |     case TimeUnit::Quarter: | ||||||
|  |         return "quarter"sv; | ||||||
|  |     case TimeUnit::Year: | ||||||
|  |         return "year"sv; | ||||||
|  |     default: | ||||||
|  |         VERIFY_NOT_REACHED(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Vector<RelativeTimeFormat> __attribute__((weak)) get_relative_time_format_patterns(StringView, TimeUnit, StringView, Style) { return {}; } | ||||||
|  | 
 | ||||||
|  | } | ||||||
							
								
								
									
										48
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,48 @@ | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> | ||||||
|  |  * | ||||||
|  |  * SPDX-License-Identifier: BSD-2-Clause | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <AK/Optional.h> | ||||||
|  | #include <AK/StringView.h> | ||||||
|  | #include <AK/Vector.h> | ||||||
|  | #include <LibUnicode/Forward.h> | ||||||
|  | #include <LibUnicode/Locale.h> | ||||||
|  | 
 | ||||||
|  | namespace Unicode { | ||||||
|  | 
 | ||||||
|  | // These are just the subset of fields in the CLDR required for ECMA-402.
 | ||||||
|  | enum class TimeUnit { | ||||||
|  |     Second, | ||||||
|  |     Minute, | ||||||
|  |     Hour, | ||||||
|  |     Day, | ||||||
|  |     Week, | ||||||
|  |     Month, | ||||||
|  |     Quarter, | ||||||
|  |     Year, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct RelativeTimeFormat { | ||||||
|  |     enum class Plurality { | ||||||
|  |         Zero, | ||||||
|  |         One, | ||||||
|  |         Two, | ||||||
|  |         Few, | ||||||
|  |         Many, | ||||||
|  |         Other, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     Plurality plurality { Plurality::Other }; | ||||||
|  |     StringView pattern; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | Optional<TimeUnit> time_unit_from_string(StringView time_unit); | ||||||
|  | StringView time_unit_to_string(TimeUnit time_unit); | ||||||
|  | 
 | ||||||
|  | Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style); | ||||||
|  | 
 | ||||||
|  | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn