mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 20:32:44 +00:00 
			
		
		
		
	LibUnicode: Parse and generate relative-time format patterns
Relative-time format patterns are of one of two forms:
    * Tensed - refer to the past or the future, e.g. "N years ago" or
      "in N years".
    * Numbered - refer to a specific numeric value, e.g. "in 1 year"
      becomes "next year" and "in 0 years" becomes "this year".
In ECMA-402, tensed and numbered refer to the numeric formatting options
of "always" and "auto", respectively.
			
			
This commit is contained in:
		
							parent
							
								
									27eda77c97
								
							
						
					
					
						commit
						789f093b2e
					
				
					 4 changed files with 295 additions and 1 deletions
				
			
		|  | @ -17,18 +17,133 @@ | |||
| #include <LibCore/ArgsParser.h> | ||||
| #include <LibCore/DirIterator.h> | ||||
| #include <LibCore/File.h> | ||||
| #include <LibUnicode/Locale.h> | ||||
| #include <LibUnicode/RelativeTimeFormat.h> | ||||
| 
 | ||||
| using StringIndexType = u16; | ||||
| constexpr auto s_string_index_type = "u16"sv; | ||||
| 
 | ||||
| using RelativeTimeFormatIndexType = u16; | ||||
| constexpr auto s_relative_time_format_index_type = "u16"sv; | ||||
| 
 | ||||
| struct RelativeTimeFormat { | ||||
|     unsigned hash() const | ||||
|     { | ||||
|         auto hash = time_unit.hash(); | ||||
|         hash = pair_int_hash(hash, style.hash()); | ||||
|         hash = pair_int_hash(hash, plurality.hash()); | ||||
|         hash = pair_int_hash(hash, tense_or_number); | ||||
|         hash = pair_int_hash(hash, pattern); | ||||
|         return hash; | ||||
|     } | ||||
| 
 | ||||
|     bool operator==(RelativeTimeFormat const& other) const | ||||
|     { | ||||
|         return (time_unit == other.time_unit) | ||||
|             && (plurality == other.plurality) | ||||
|             && (style == other.style) | ||||
|             && (tense_or_number == other.tense_or_number) | ||||
|             && (pattern == other.pattern); | ||||
|     } | ||||
| 
 | ||||
|     String time_unit; | ||||
|     String style; | ||||
|     String plurality; | ||||
|     StringIndexType tense_or_number { 0 }; | ||||
|     StringIndexType pattern { 0 }; | ||||
| }; | ||||
| 
 | ||||
| template<> | ||||
| struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> { | ||||
|     ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format) | ||||
|     { | ||||
|         return Formatter<FormatString>::format(builder, | ||||
|             "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}", | ||||
|             format.time_unit, | ||||
|             format.style, | ||||
|             format.plurality, | ||||
|             format.tense_or_number, | ||||
|             format.pattern); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| template<> | ||||
| struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> { | ||||
|     static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); } | ||||
| }; | ||||
| 
 | ||||
| struct Locale { | ||||
|     Vector<RelativeTimeFormatIndexType> time_units; | ||||
| }; | ||||
| 
 | ||||
| struct UnicodeLocaleData { | ||||
|     UniqueStringStorage<StringIndexType> unique_strings; | ||||
|     UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats; | ||||
| 
 | ||||
|     HashMap<String, Locale> locales; | ||||
| }; | ||||
| 
 | ||||
| static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale) | ||||
| { | ||||
|     LexicalPath date_fields_path(move(locale_dates_path)); | ||||
|     date_fields_path = date_fields_path.append("dateFields.json"sv); | ||||
| 
 | ||||
|     auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly)); | ||||
|     auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all())); | ||||
| 
 | ||||
|     auto const& main_object = date_fields.as_object().get("main"sv); | ||||
|     auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename()); | ||||
|     auto const& dates_object = locale_object.as_object().get("dates"sv); | ||||
|     auto const& fields_object = dates_object.as_object().get("fields"sv); | ||||
| 
 | ||||
|     auto is_sanctioned_unit = [](auto unit) { | ||||
|         // This is a copy of the time units sanctioned for use within ECMA-402.
 | ||||
|         // https://tc39.es/ecma402/#sec-singularrelativetimeunit
 | ||||
|         return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv); | ||||
|     }; | ||||
| 
 | ||||
|     auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) { | ||||
|         RelativeTimeFormat format {}; | ||||
|         format.time_unit = unit.to_titlecase_string(); | ||||
|         format.style = style.to_titlecase_string(); | ||||
|         format.plurality = plurality.to_titlecase_string(); | ||||
|         format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number); | ||||
|         format.pattern = locale_data.unique_strings.ensure(pattern.as_string()); | ||||
| 
 | ||||
|         locale.time_units.append(locale_data.unique_formats.ensure(move(format))); | ||||
|     }; | ||||
| 
 | ||||
|     fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) { | ||||
|         auto segments = unit_and_style.split_view('-'); | ||||
|         auto unit = segments[0]; | ||||
|         auto style = (segments.size() > 1) ? segments[1] : "long"sv; | ||||
| 
 | ||||
|         if (!is_sanctioned_unit(unit)) | ||||
|             return; | ||||
| 
 | ||||
|         patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) { | ||||
|             constexpr auto number_key = "relative-type-"sv; | ||||
|             constexpr auto tense_key = "relativeTime-type-"sv; | ||||
|             constexpr auto plurality_key = "relativeTimePattern-count-"sv; | ||||
| 
 | ||||
|             if (type.starts_with(number_key)) { | ||||
|                 auto number = type.substring_view(number_key.length()); | ||||
|                 parse_pattern(unit, style, "Other"sv, number, pattern_value); | ||||
|             } else if (type.starts_with(tense_key)) { | ||||
|                 pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) { | ||||
|                     VERIFY(key.starts_with(plurality_key)); | ||||
|                     auto plurality = key.substring_view(plurality_key.length()); | ||||
|                     auto tense = type.substring_view(tense_key.length()); | ||||
| 
 | ||||
|                     parse_pattern(unit, style, plurality, tense, pattern); | ||||
|                 }); | ||||
|             } | ||||
|         }); | ||||
|     }); | ||||
| 
 | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data) | ||||
| { | ||||
|     auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); | ||||
|  | @ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc | |||
|         auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); | ||||
|         auto language = TRY(remove_variants_from_path(dates_path)); | ||||
| 
 | ||||
|         [[maybe_unused]] auto& locale = locale_data.locales.ensure(language); | ||||
|         auto& locale = locale_data.locales.ensure(language); | ||||
|         TRY(parse_date_fields(move(dates_path), locale_data, locale)); | ||||
|     } | ||||
| 
 | ||||
|     return {}; | ||||
|  | @ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca | |||
|     StringBuilder builder; | ||||
|     SourceGenerator generator { builder }; | ||||
|     generator.set("string_index_type"sv, s_string_index_type); | ||||
|     generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| #include <AK/Array.h> | ||||
| #include <AK/StringView.h> | ||||
| #include <AK/Vector.h> | ||||
| #include <LibUnicode/Locale.h> | ||||
| #include <LibUnicode/RelativeTimeFormat.h> | ||||
| #include <LibUnicode/UnicodeRelativeTimeFormat.h> | ||||
| 
 | ||||
| namespace Unicode { | ||||
|  | @ -93,6 +213,73 @@ namespace Unicode { | |||
|     locale_data.unique_strings.generate(generator); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| struct RelativeTimeFormatImpl { | ||||
|     RelativeTimeFormat to_relative_time_format() const | ||||
|     { | ||||
|         RelativeTimeFormat relative_time_format {}; | ||||
|         relative_time_format.plurality = plurality; | ||||
|         relative_time_format.pattern = s_string_list[pattern]; | ||||
| 
 | ||||
|         return relative_time_format; | ||||
|     } | ||||
| 
 | ||||
|     TimeUnit time_unit; | ||||
|     Style style; | ||||
|     RelativeTimeFormat::Plurality plurality; | ||||
|     @string_index_type@ tense_or_number { 0 }; | ||||
|     @string_index_type@ pattern { 0 }; | ||||
| }; | ||||
| )~~~"); | ||||
| 
 | ||||
|     locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10); | ||||
| 
 | ||||
|     auto append_list = [&](String name, auto const& list) { | ||||
|         generator.set("name", name); | ||||
|         generator.set("size", String::number(list.size())); | ||||
| 
 | ||||
|         generator.append(R"~~~( | ||||
| static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~"); | ||||
| 
 | ||||
|         bool first = true; | ||||
|         for (auto index : list) { | ||||
|             generator.append(first ? " " : ", "); | ||||
|             generator.append(String::number(index)); | ||||
|             first = false; | ||||
|         } | ||||
| 
 | ||||
|         generator.append(" } };"); | ||||
|     }; | ||||
| 
 | ||||
|     generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); }); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style) | ||||
| { | ||||
|     Vector<RelativeTimeFormat> formats; | ||||
| 
 | ||||
|     auto locale_value = locale_from_string(locale); | ||||
|     if (!locale_value.has_value()) | ||||
|         return formats; | ||||
| 
 | ||||
|     auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
 | ||||
|     auto const& locale_formats = s_locale_relative_time_formats.at(locale_index); | ||||
| 
 | ||||
|     for (auto const& locale_format_index : locale_formats) { | ||||
|         auto const& locale_format = s_relative_time_formats.at(locale_format_index); | ||||
| 
 | ||||
|         if (locale_format.time_unit != time_unit) | ||||
|             continue; | ||||
|         if (locale_format.style != style) | ||||
|             continue; | ||||
|         if (s_string_list[locale_format.tense_or_number] != tense_or_number) | ||||
|             continue; | ||||
| 
 | ||||
|         formats.append(locale_format.to_relative_time_format()); | ||||
|     } | ||||
| 
 | ||||
|     return formats; | ||||
| } | ||||
| 
 | ||||
| } | ||||
| )~~~"); | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ set(SOURCES | |||
|     DateTimeFormat.cpp | ||||
|     Locale.cpp | ||||
|     NumberFormat.cpp | ||||
|     RelativeTimeFormat.cpp | ||||
| ) | ||||
| 
 | ||||
| serenity_lib(LibUnicode unicode) | ||||
|  |  | |||
							
								
								
									
										58
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #include <LibUnicode/RelativeTimeFormat.h> | ||||
| 
 | ||||
| namespace Unicode { | ||||
| 
 | ||||
| Optional<TimeUnit> time_unit_from_string(StringView time_unit) | ||||
| { | ||||
|     if (time_unit == "second"sv) | ||||
|         return TimeUnit::Second; | ||||
|     if (time_unit == "minute"sv) | ||||
|         return TimeUnit::Minute; | ||||
|     if (time_unit == "hour"sv) | ||||
|         return TimeUnit::Hour; | ||||
|     if (time_unit == "day"sv) | ||||
|         return TimeUnit::Day; | ||||
|     if (time_unit == "week"sv) | ||||
|         return TimeUnit::Week; | ||||
|     if (time_unit == "month"sv) | ||||
|         return TimeUnit::Month; | ||||
|     if (time_unit == "quarter"sv) | ||||
|         return TimeUnit::Quarter; | ||||
|     if (time_unit == "year"sv) | ||||
|         return TimeUnit::Year; | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| StringView time_unit_to_string(TimeUnit time_unit) | ||||
| { | ||||
|     switch (time_unit) { | ||||
|     case TimeUnit::Second: | ||||
|         return "second"sv; | ||||
|     case TimeUnit::Minute: | ||||
|         return "minute"sv; | ||||
|     case TimeUnit::Hour: | ||||
|         return "hour"sv; | ||||
|     case TimeUnit::Day: | ||||
|         return "day"sv; | ||||
|     case TimeUnit::Week: | ||||
|         return "week"sv; | ||||
|     case TimeUnit::Month: | ||||
|         return "month"sv; | ||||
|     case TimeUnit::Quarter: | ||||
|         return "quarter"sv; | ||||
|     case TimeUnit::Year: | ||||
|         return "year"sv; | ||||
|     default: | ||||
|         VERIFY_NOT_REACHED(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Vector<RelativeTimeFormat> __attribute__((weak)) get_relative_time_format_patterns(StringView, TimeUnit, StringView, Style) { return {}; } | ||||
| 
 | ||||
| } | ||||
							
								
								
									
										48
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								Userland/Libraries/LibUnicode/RelativeTimeFormat.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,48 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <AK/Optional.h> | ||||
| #include <AK/StringView.h> | ||||
| #include <AK/Vector.h> | ||||
| #include <LibUnicode/Forward.h> | ||||
| #include <LibUnicode/Locale.h> | ||||
| 
 | ||||
| namespace Unicode { | ||||
| 
 | ||||
| // These are just the subset of fields in the CLDR required for ECMA-402.
 | ||||
| enum class TimeUnit { | ||||
|     Second, | ||||
|     Minute, | ||||
|     Hour, | ||||
|     Day, | ||||
|     Week, | ||||
|     Month, | ||||
|     Quarter, | ||||
|     Year, | ||||
| }; | ||||
| 
 | ||||
| struct RelativeTimeFormat { | ||||
|     enum class Plurality { | ||||
|         Zero, | ||||
|         One, | ||||
|         Two, | ||||
|         Few, | ||||
|         Many, | ||||
|         Other, | ||||
|     }; | ||||
| 
 | ||||
|     Plurality plurality { Plurality::Other }; | ||||
|     StringView pattern; | ||||
| }; | ||||
| 
 | ||||
| Optional<TimeUnit> time_unit_from_string(StringView time_unit); | ||||
| StringView time_unit_to_string(TimeUnit time_unit); | ||||
| 
 | ||||
| Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style); | ||||
| 
 | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn