mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 06:58:11 +00:00
LibUnicode: Parse and generate relative-time format patterns
Relative-time format patterns are of one of two forms: * Tensed - refer to the past or the future, e.g. "N years ago" or "in N years". * Numbered - refer to a specific numeric value, e.g. "in 1 year" becomes "next year" and "in 0 years" becomes "this year". In ECMA-402, tensed and numbered refer to the numeric formatting options of "always" and "auto", respectively.
This commit is contained in:
parent
27eda77c97
commit
789f093b2e
4 changed files with 295 additions and 1 deletions
|
@ -17,18 +17,133 @@
|
||||||
#include <LibCore/ArgsParser.h>
|
#include <LibCore/ArgsParser.h>
|
||||||
#include <LibCore/DirIterator.h>
|
#include <LibCore/DirIterator.h>
|
||||||
#include <LibCore/File.h>
|
#include <LibCore/File.h>
|
||||||
|
#include <LibUnicode/Locale.h>
|
||||||
|
#include <LibUnicode/RelativeTimeFormat.h>
|
||||||
|
|
||||||
using StringIndexType = u16;
|
using StringIndexType = u16;
|
||||||
constexpr auto s_string_index_type = "u16"sv;
|
constexpr auto s_string_index_type = "u16"sv;
|
||||||
|
|
||||||
|
using RelativeTimeFormatIndexType = u16;
|
||||||
|
constexpr auto s_relative_time_format_index_type = "u16"sv;
|
||||||
|
|
||||||
|
struct RelativeTimeFormat {
|
||||||
|
unsigned hash() const
|
||||||
|
{
|
||||||
|
auto hash = time_unit.hash();
|
||||||
|
hash = pair_int_hash(hash, style.hash());
|
||||||
|
hash = pair_int_hash(hash, plurality.hash());
|
||||||
|
hash = pair_int_hash(hash, tense_or_number);
|
||||||
|
hash = pair_int_hash(hash, pattern);
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(RelativeTimeFormat const& other) const
|
||||||
|
{
|
||||||
|
return (time_unit == other.time_unit)
|
||||||
|
&& (plurality == other.plurality)
|
||||||
|
&& (style == other.style)
|
||||||
|
&& (tense_or_number == other.tense_or_number)
|
||||||
|
&& (pattern == other.pattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
String time_unit;
|
||||||
|
String style;
|
||||||
|
String plurality;
|
||||||
|
StringIndexType tense_or_number { 0 };
|
||||||
|
StringIndexType pattern { 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
|
||||||
|
ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
|
||||||
|
{
|
||||||
|
return Formatter<FormatString>::format(builder,
|
||||||
|
"{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}",
|
||||||
|
format.time_unit,
|
||||||
|
format.style,
|
||||||
|
format.plurality,
|
||||||
|
format.tense_or_number,
|
||||||
|
format.pattern);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
|
||||||
|
static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
|
||||||
|
};
|
||||||
|
|
||||||
struct Locale {
|
struct Locale {
|
||||||
|
Vector<RelativeTimeFormatIndexType> time_units;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct UnicodeLocaleData {
|
struct UnicodeLocaleData {
|
||||||
UniqueStringStorage<StringIndexType> unique_strings;
|
UniqueStringStorage<StringIndexType> unique_strings;
|
||||||
|
UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
|
||||||
|
|
||||||
HashMap<String, Locale> locales;
|
HashMap<String, Locale> locales;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
|
||||||
|
{
|
||||||
|
LexicalPath date_fields_path(move(locale_dates_path));
|
||||||
|
date_fields_path = date_fields_path.append("dateFields.json"sv);
|
||||||
|
|
||||||
|
auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly));
|
||||||
|
auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all()));
|
||||||
|
|
||||||
|
auto const& main_object = date_fields.as_object().get("main"sv);
|
||||||
|
auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
|
||||||
|
auto const& dates_object = locale_object.as_object().get("dates"sv);
|
||||||
|
auto const& fields_object = dates_object.as_object().get("fields"sv);
|
||||||
|
|
||||||
|
auto is_sanctioned_unit = [](auto unit) {
|
||||||
|
// This is a copy of the time units sanctioned for use within ECMA-402.
|
||||||
|
// https://tc39.es/ecma402/#sec-singularrelativetimeunit
|
||||||
|
return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
|
||||||
|
RelativeTimeFormat format {};
|
||||||
|
format.time_unit = unit.to_titlecase_string();
|
||||||
|
format.style = style.to_titlecase_string();
|
||||||
|
format.plurality = plurality.to_titlecase_string();
|
||||||
|
format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
|
||||||
|
format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
|
||||||
|
|
||||||
|
locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
|
||||||
|
};
|
||||||
|
|
||||||
|
fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
|
||||||
|
auto segments = unit_and_style.split_view('-');
|
||||||
|
auto unit = segments[0];
|
||||||
|
auto style = (segments.size() > 1) ? segments[1] : "long"sv;
|
||||||
|
|
||||||
|
if (!is_sanctioned_unit(unit))
|
||||||
|
return;
|
||||||
|
|
||||||
|
patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
|
||||||
|
constexpr auto number_key = "relative-type-"sv;
|
||||||
|
constexpr auto tense_key = "relativeTime-type-"sv;
|
||||||
|
constexpr auto plurality_key = "relativeTimePattern-count-"sv;
|
||||||
|
|
||||||
|
if (type.starts_with(number_key)) {
|
||||||
|
auto number = type.substring_view(number_key.length());
|
||||||
|
parse_pattern(unit, style, "Other"sv, number, pattern_value);
|
||||||
|
} else if (type.starts_with(tense_key)) {
|
||||||
|
pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
|
||||||
|
VERIFY(key.starts_with(plurality_key));
|
||||||
|
auto plurality = key.substring_view(plurality_key.length());
|
||||||
|
auto tense = type.substring_view(tense_key.length());
|
||||||
|
|
||||||
|
parse_pattern(unit, style, plurality, tense, pattern);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
|
static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
|
||||||
{
|
{
|
||||||
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
|
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
|
||||||
|
@ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc
|
||||||
auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
|
auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
|
||||||
auto language = TRY(remove_variants_from_path(dates_path));
|
auto language = TRY(remove_variants_from_path(dates_path));
|
||||||
|
|
||||||
[[maybe_unused]] auto& locale = locale_data.locales.ensure(language);
|
auto& locale = locale_data.locales.ensure(language);
|
||||||
|
TRY(parse_date_fields(move(dates_path), locale_data, locale));
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
SourceGenerator generator { builder };
|
SourceGenerator generator { builder };
|
||||||
generator.set("string_index_type"sv, s_string_index_type);
|
generator.set("string_index_type"sv, s_string_index_type);
|
||||||
|
generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
#include <AK/Array.h>
|
#include <AK/Array.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Vector.h>
|
||||||
|
#include <LibUnicode/Locale.h>
|
||||||
|
#include <LibUnicode/RelativeTimeFormat.h>
|
||||||
#include <LibUnicode/UnicodeRelativeTimeFormat.h>
|
#include <LibUnicode/UnicodeRelativeTimeFormat.h>
|
||||||
|
|
||||||
namespace Unicode {
|
namespace Unicode {
|
||||||
|
@ -93,6 +213,73 @@ namespace Unicode {
|
||||||
locale_data.unique_strings.generate(generator);
|
locale_data.unique_strings.generate(generator);
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
|
struct RelativeTimeFormatImpl {
|
||||||
|
RelativeTimeFormat to_relative_time_format() const
|
||||||
|
{
|
||||||
|
RelativeTimeFormat relative_time_format {};
|
||||||
|
relative_time_format.plurality = plurality;
|
||||||
|
relative_time_format.pattern = s_string_list[pattern];
|
||||||
|
|
||||||
|
return relative_time_format;
|
||||||
|
}
|
||||||
|
|
||||||
|
TimeUnit time_unit;
|
||||||
|
Style style;
|
||||||
|
RelativeTimeFormat::Plurality plurality;
|
||||||
|
@string_index_type@ tense_or_number { 0 };
|
||||||
|
@string_index_type@ pattern { 0 };
|
||||||
|
};
|
||||||
|
)~~~");
|
||||||
|
|
||||||
|
locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
|
||||||
|
|
||||||
|
auto append_list = [&](String name, auto const& list) {
|
||||||
|
generator.set("name", name);
|
||||||
|
generator.set("size", String::number(list.size()));
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (auto index : list) {
|
||||||
|
generator.append(first ? " " : ", ");
|
||||||
|
generator.append(String::number(index));
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
generator.append(" } };");
|
||||||
|
};
|
||||||
|
|
||||||
|
generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
|
||||||
|
{
|
||||||
|
Vector<RelativeTimeFormat> formats;
|
||||||
|
|
||||||
|
auto locale_value = locale_from_string(locale);
|
||||||
|
if (!locale_value.has_value())
|
||||||
|
return formats;
|
||||||
|
|
||||||
|
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||||
|
auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
|
||||||
|
|
||||||
|
for (auto const& locale_format_index : locale_formats) {
|
||||||
|
auto const& locale_format = s_relative_time_formats.at(locale_format_index);
|
||||||
|
|
||||||
|
if (locale_format.time_unit != time_unit)
|
||||||
|
continue;
|
||||||
|
if (locale_format.style != style)
|
||||||
|
continue;
|
||||||
|
if (s_string_list[locale_format.tense_or_number] != tense_or_number)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
formats.append(locale_format.to_relative_time_format());
|
||||||
|
}
|
||||||
|
|
||||||
|
return formats;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ set(SOURCES
|
||||||
DateTimeFormat.cpp
|
DateTimeFormat.cpp
|
||||||
Locale.cpp
|
Locale.cpp
|
||||||
NumberFormat.cpp
|
NumberFormat.cpp
|
||||||
|
RelativeTimeFormat.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
serenity_lib(LibUnicode unicode)
|
serenity_lib(LibUnicode unicode)
|
||||||
|
|
58
Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
Normal file
58
Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibUnicode/RelativeTimeFormat.h>
|
||||||
|
|
||||||
|
namespace Unicode {
|
||||||
|
|
||||||
|
Optional<TimeUnit> time_unit_from_string(StringView time_unit)
|
||||||
|
{
|
||||||
|
if (time_unit == "second"sv)
|
||||||
|
return TimeUnit::Second;
|
||||||
|
if (time_unit == "minute"sv)
|
||||||
|
return TimeUnit::Minute;
|
||||||
|
if (time_unit == "hour"sv)
|
||||||
|
return TimeUnit::Hour;
|
||||||
|
if (time_unit == "day"sv)
|
||||||
|
return TimeUnit::Day;
|
||||||
|
if (time_unit == "week"sv)
|
||||||
|
return TimeUnit::Week;
|
||||||
|
if (time_unit == "month"sv)
|
||||||
|
return TimeUnit::Month;
|
||||||
|
if (time_unit == "quarter"sv)
|
||||||
|
return TimeUnit::Quarter;
|
||||||
|
if (time_unit == "year"sv)
|
||||||
|
return TimeUnit::Year;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
StringView time_unit_to_string(TimeUnit time_unit)
|
||||||
|
{
|
||||||
|
switch (time_unit) {
|
||||||
|
case TimeUnit::Second:
|
||||||
|
return "second"sv;
|
||||||
|
case TimeUnit::Minute:
|
||||||
|
return "minute"sv;
|
||||||
|
case TimeUnit::Hour:
|
||||||
|
return "hour"sv;
|
||||||
|
case TimeUnit::Day:
|
||||||
|
return "day"sv;
|
||||||
|
case TimeUnit::Week:
|
||||||
|
return "week"sv;
|
||||||
|
case TimeUnit::Month:
|
||||||
|
return "month"sv;
|
||||||
|
case TimeUnit::Quarter:
|
||||||
|
return "quarter"sv;
|
||||||
|
case TimeUnit::Year:
|
||||||
|
return "year"sv;
|
||||||
|
default:
|
||||||
|
VERIFY_NOT_REACHED();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<RelativeTimeFormat> __attribute__((weak)) get_relative_time_format_patterns(StringView, TimeUnit, StringView, Style) { return {}; }
|
||||||
|
|
||||||
|
}
|
48
Userland/Libraries/LibUnicode/RelativeTimeFormat.h
Normal file
48
Userland/Libraries/LibUnicode/RelativeTimeFormat.h
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Optional.h>
|
||||||
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Vector.h>
|
||||||
|
#include <LibUnicode/Forward.h>
|
||||||
|
#include <LibUnicode/Locale.h>
|
||||||
|
|
||||||
|
namespace Unicode {
|
||||||
|
|
||||||
|
// These are just the subset of fields in the CLDR required for ECMA-402.
|
||||||
|
enum class TimeUnit {
|
||||||
|
Second,
|
||||||
|
Minute,
|
||||||
|
Hour,
|
||||||
|
Day,
|
||||||
|
Week,
|
||||||
|
Month,
|
||||||
|
Quarter,
|
||||||
|
Year,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RelativeTimeFormat {
|
||||||
|
enum class Plurality {
|
||||||
|
Zero,
|
||||||
|
One,
|
||||||
|
Two,
|
||||||
|
Few,
|
||||||
|
Many,
|
||||||
|
Other,
|
||||||
|
};
|
||||||
|
|
||||||
|
Plurality plurality { Plurality::Other };
|
||||||
|
StringView pattern;
|
||||||
|
};
|
||||||
|
|
||||||
|
Optional<TimeUnit> time_unit_from_string(StringView time_unit);
|
||||||
|
StringView time_unit_to_string(TimeUnit time_unit);
|
||||||
|
|
||||||
|
Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style);
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue