mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 00:27:43 +00:00
LibUnicode: Move CLDR data generators to a LibLocale subfolder
To prepare for placing all CLDR generated data in a new library, LibLocale, this moves the code generators for the CLDR data to the LibLocale subfolder.
This commit is contained in:
parent
e3e0602833
commit
89d1813b5d
17 changed files with 174 additions and 158 deletions
|
@ -1,6 +1 @@
|
|||
lagom_tool(GenerateUnicodeData SOURCES GenerateUnicodeData.cpp LIBS LibMain)
|
||||
lagom_tool(GenerateUnicodeDateTimeFormat SOURCES GenerateUnicodeDateTimeFormat.cpp LIBS LibMain LibTimeZone)
|
||||
lagom_tool(GenerateUnicodeLocale SOURCES GenerateUnicodeLocale.cpp LIBS LibMain)
|
||||
lagom_tool(GenerateUnicodeNumberFormat SOURCES GenerateUnicodeNumberFormat.cpp LIBS LibMain)
|
||||
lagom_tool(GenerateUnicodePluralRules SOURCES GenerateUnicodePluralRules.cpp LIBS LibMain)
|
||||
lagom_tool(GenerateUnicodeRelativeTimeFormat SOURCES GenerateUnicodeRelativeTimeFormat.cpp LIBS LibMain)
|
||||
|
|
|
@ -687,7 +687,7 @@ enum class @name@ : @underlying@ {)~~~");
|
|||
|
||||
#include <AK/Types.h>
|
||||
#include <LibUnicode/Forward.h>
|
||||
#include <LibUnicode/UnicodeLocale.h>
|
||||
#include <LibUnicode/LocaleData.h>
|
||||
|
||||
namespace Unicode {
|
||||
)~~~");
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,689 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "GeneratorUtil.h"
|
||||
#include <AK/Format.h>
|
||||
#include <AK/JsonObject.h>
|
||||
#include <AK/JsonParser.h>
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/SourceGenerator.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Variant.h>
|
||||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibCore/File.h>
|
||||
#include <LibCore/Stream.h>
|
||||
#include <LibUnicode/PluralRules.h>
|
||||
|
||||
using StringIndexType = u16;
|
||||
|
||||
static String format_identifier(StringView owner, String identifier)
|
||||
{
|
||||
identifier = identifier.replace("-"sv, "_"sv, ReplaceMode::All);
|
||||
|
||||
if (all_of(identifier, is_ascii_digit))
|
||||
return String::formatted("{}_{}", owner[0], identifier);
|
||||
if (is_ascii_lower_alpha(identifier[0]))
|
||||
return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
|
||||
return identifier;
|
||||
}
|
||||
|
||||
struct Relation {
|
||||
using Range = Array<u32, 2>;
|
||||
using Comparator = Variant<u32, Range>;
|
||||
|
||||
enum class Type {
|
||||
Equality,
|
||||
Inequality,
|
||||
};
|
||||
|
||||
String const& modulus_variable_name() const
|
||||
{
|
||||
VERIFY(modulus.has_value());
|
||||
|
||||
if (!cached_modulus_variable_name.has_value())
|
||||
cached_modulus_variable_name = String::formatted("mod_{}_{}", symbol, *modulus);
|
||||
|
||||
return *cached_modulus_variable_name;
|
||||
}
|
||||
|
||||
String const& exponential_variable_name() const
|
||||
{
|
||||
if (!cached_exponential_variable_name.has_value())
|
||||
cached_exponential_variable_name = String::formatted("exp_{}", symbol);
|
||||
|
||||
return *cached_exponential_variable_name;
|
||||
}
|
||||
|
||||
void generate_relation(SourceGenerator& generator) const
|
||||
{
|
||||
auto append_variable_name = [&]() {
|
||||
if (modulus.has_value())
|
||||
generator.append(modulus_variable_name());
|
||||
else if (symbol == 'e' || symbol == 'c')
|
||||
generator.append(exponential_variable_name());
|
||||
else
|
||||
generator.append(String::formatted("ops.{}", Unicode::PluralOperands::symbol_to_variable_name(symbol)));
|
||||
};
|
||||
|
||||
auto append_value = [&](u32 value) {
|
||||
append_variable_name();
|
||||
generator.append(" == "sv);
|
||||
generator.append(String::number(value));
|
||||
};
|
||||
|
||||
auto append_range = [&](auto const& range) {
|
||||
// This check avoids generating "0 <= unsigned_value", which is always true.
|
||||
if (range[0] != 0 || Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) {
|
||||
generator.append(String::formatted("{} <= ", range[0]));
|
||||
append_variable_name();
|
||||
generator.append(" && "sv);
|
||||
}
|
||||
|
||||
append_variable_name();
|
||||
generator.append(String::formatted(" <= {}", range[1]));
|
||||
};
|
||||
|
||||
if (type == Type::Inequality)
|
||||
generator.append("!"sv);
|
||||
|
||||
generator.append("("sv);
|
||||
|
||||
bool first = true;
|
||||
for (auto const& comparator : comparators) {
|
||||
generator.append(first ? "("sv : " || ("sv);
|
||||
|
||||
comparator.visit(
|
||||
[&](u32 value) { append_value(value); },
|
||||
[&](Range const& range) { append_range(range); });
|
||||
|
||||
generator.append(")"sv);
|
||||
first = false;
|
||||
}
|
||||
|
||||
generator.append(")"sv);
|
||||
}
|
||||
|
||||
void generate_precomputed_variables(SourceGenerator& generator, HashTable<String>& generated_variables) const
|
||||
{
|
||||
// FIXME: How do we handle the exponential symbols? They seem unused by ECMA-402.
|
||||
if (symbol == 'e' || symbol == 'c') {
|
||||
if (auto variable = exponential_variable_name(); !generated_variables.contains(variable)) {
|
||||
generated_variables.set(variable);
|
||||
generator.set("variable"sv, move(variable));
|
||||
generator.append(R"~~~(
|
||||
auto @variable@ = 0;)~~~");
|
||||
}
|
||||
}
|
||||
|
||||
if (!modulus.has_value())
|
||||
return;
|
||||
|
||||
auto variable = modulus_variable_name();
|
||||
if (generated_variables.contains(variable))
|
||||
return;
|
||||
|
||||
generated_variables.set(variable);
|
||||
generator.set("variable"sv, move(variable));
|
||||
generator.set("operand"sv, Unicode::PluralOperands::symbol_to_variable_name(symbol));
|
||||
generator.set("modulus"sv, String::number(*modulus));
|
||||
|
||||
if (Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) {
|
||||
generator.append(R"~~~(
|
||||
auto @variable@ = fmod(ops.@operand@, @modulus@);)~~~");
|
||||
} else {
|
||||
generator.append(R"~~~(
|
||||
auto @variable@ = ops.@operand@ % @modulus@;)~~~");
|
||||
}
|
||||
}
|
||||
|
||||
Type type;
|
||||
char symbol { 0 };
|
||||
Optional<u32> modulus;
|
||||
Vector<Comparator> comparators;
|
||||
|
||||
private:
|
||||
mutable Optional<String> cached_modulus_variable_name;
|
||||
mutable Optional<String> cached_exponential_variable_name;
|
||||
};
|
||||
|
||||
struct Condition {
|
||||
void generate_condition(SourceGenerator& generator) const
|
||||
{
|
||||
for (size_t i = 0; i < relations.size(); ++i) {
|
||||
if (i > 0)
|
||||
generator.append(" || "sv);
|
||||
|
||||
auto const& conjunctions = relations[i];
|
||||
if (conjunctions.size() > 1)
|
||||
generator.append("("sv);
|
||||
|
||||
for (size_t j = 0; j < conjunctions.size(); ++j) {
|
||||
if (j > 0)
|
||||
generator.append(" && "sv);
|
||||
conjunctions[j].generate_relation(generator);
|
||||
}
|
||||
|
||||
if (conjunctions.size() > 1)
|
||||
generator.append(")"sv);
|
||||
}
|
||||
}
|
||||
|
||||
void generate_precomputed_variables(SourceGenerator& generator, HashTable<String>& generated_variables) const
|
||||
{
|
||||
for (auto const& conjunctions : relations) {
|
||||
for (auto const& relation : conjunctions)
|
||||
relation.generate_precomputed_variables(generator, generated_variables);
|
||||
}
|
||||
}
|
||||
|
||||
Vector<Vector<Relation>> relations;
|
||||
};
|
||||
|
||||
struct Range {
|
||||
String start;
|
||||
String end;
|
||||
String category;
|
||||
};
|
||||
|
||||
using Conditions = HashMap<String, Condition>;
|
||||
using Ranges = Vector<Range>;
|
||||
|
||||
struct Locale {
|
||||
static String generated_method_name(StringView form, StringView locale)
|
||||
{
|
||||
return String::formatted("{}_plurality_{}", form, format_identifier({}, locale));
|
||||
}
|
||||
|
||||
Conditions& rules_for_form(StringView form)
|
||||
{
|
||||
if (form == "cardinal")
|
||||
return cardinal_rules;
|
||||
if (form == "ordinal")
|
||||
return ordinal_rules;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
Conditions cardinal_rules;
|
||||
Conditions ordinal_rules;
|
||||
Ranges plural_ranges;
|
||||
};
|
||||
|
||||
struct UnicodeLocaleData {
|
||||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
|
||||
HashMap<String, Locale> locales;
|
||||
};
|
||||
|
||||
static Relation parse_relation(StringView relation)
|
||||
{
|
||||
static constexpr auto equality_operator = " = "sv;
|
||||
static constexpr auto inequality_operator = " != "sv;
|
||||
static constexpr auto modulus_operator = " % "sv;
|
||||
static constexpr auto range_operator = ".."sv;
|
||||
static constexpr auto set_operator = ',';
|
||||
|
||||
Relation parsed;
|
||||
|
||||
StringView lhs;
|
||||
StringView rhs;
|
||||
|
||||
if (auto index = relation.find(equality_operator); index.has_value()) {
|
||||
parsed.type = Relation::Type::Equality;
|
||||
lhs = relation.substring_view(0, *index);
|
||||
rhs = relation.substring_view(*index + equality_operator.length());
|
||||
} else if (auto index = relation.find(inequality_operator); index.has_value()) {
|
||||
parsed.type = Relation::Type::Inequality;
|
||||
lhs = relation.substring_view(0, *index);
|
||||
rhs = relation.substring_view(*index + inequality_operator.length());
|
||||
} else {
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
if (auto index = lhs.find(modulus_operator); index.has_value()) {
|
||||
auto symbol = lhs.substring_view(0, *index);
|
||||
VERIFY(symbol.length() == 1);
|
||||
|
||||
auto modulus = lhs.substring_view(*index + modulus_operator.length()).to_uint();
|
||||
VERIFY(modulus.has_value());
|
||||
|
||||
parsed.symbol = symbol[0];
|
||||
parsed.modulus = move(modulus);
|
||||
} else {
|
||||
VERIFY(lhs.length() == 1);
|
||||
parsed.symbol = lhs[0];
|
||||
}
|
||||
|
||||
rhs.for_each_split_view(set_operator, false, [&](auto set) {
|
||||
if (auto index = set.find(range_operator); index.has_value()) {
|
||||
auto range_begin = set.substring_view(0, *index).to_uint();
|
||||
VERIFY(range_begin.has_value());
|
||||
|
||||
auto range_end = set.substring_view(*index + range_operator.length()).to_uint();
|
||||
VERIFY(range_end.has_value());
|
||||
|
||||
parsed.comparators.empend(Array { *range_begin, *range_end });
|
||||
} else {
|
||||
auto value = set.to_uint();
|
||||
VERIFY(value.has_value());
|
||||
|
||||
parsed.comparators.empend(*value);
|
||||
}
|
||||
});
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
|
||||
//
|
||||
// A very simplified view of a plural rule is:
|
||||
//
|
||||
// condition.* ([@integer|@decimal] sample)+
|
||||
//
|
||||
// The "sample" being series of integer or decimal values that fit the specified condition. The
|
||||
// condition may be one or more binary expressions, chained together with "and" or "or" operators.
|
||||
static void parse_condition(StringView category, StringView rule, Conditions& rules)
|
||||
{
|
||||
static constexpr auto other_category = "other"sv;
|
||||
static constexpr auto disjunction_keyword = " or "sv;
|
||||
static constexpr auto conjunction_keyword = " and "sv;
|
||||
|
||||
// We don't need the examples in the generated code, so we can drop them here.
|
||||
auto example_index = rule.find('@');
|
||||
VERIFY(example_index.has_value());
|
||||
|
||||
auto condition = rule.substring_view(0, *example_index).trim_whitespace();
|
||||
|
||||
// Our implementation does not generate rules for the "other" category. We simply return "other"
|
||||
// for values that do not match any rules. This will need to be revisited if this VERIFY fails.
|
||||
if (condition.is_empty()) {
|
||||
VERIFY(category == other_category);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& relation_list = rules.ensure(category);
|
||||
|
||||
// The grammar for a condition (i.e. a chain of relations) is:
|
||||
//
|
||||
// condition = and_condition ('or' and_condition)*
|
||||
// and_condition = relation ('and' relation)*
|
||||
//
|
||||
// This affords some simplicity in that disjunctions are never embedded within a conjunction.
|
||||
condition.for_each_split_view(disjunction_keyword, false, [&](auto disjunction) {
|
||||
Vector<Relation> conjunctions;
|
||||
|
||||
disjunction.for_each_split_view(conjunction_keyword, false, [&](auto relation) {
|
||||
conjunctions.append(parse_relation(relation));
|
||||
});
|
||||
|
||||
relation_list.relations.append(move(conjunctions));
|
||||
});
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_plural_rules(String core_supplemental_path, StringView file_name, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
static constexpr auto form_prefix = "plurals-type-"sv;
|
||||
static constexpr auto rule_prefix = "pluralRule-count-"sv;
|
||||
|
||||
LexicalPath plurals_path(move(core_supplemental_path));
|
||||
plurals_path = plurals_path.append(file_name);
|
||||
|
||||
auto plurals = TRY(read_json_file(plurals_path.string()));
|
||||
auto const& supplemental_object = plurals.as_object().get("supplemental"sv);
|
||||
|
||||
supplemental_object.as_object().for_each_member([&](auto const& key, auto const& plurals_object) {
|
||||
if (!key.starts_with(form_prefix))
|
||||
return;
|
||||
|
||||
auto form = key.substring_view(form_prefix.length());
|
||||
|
||||
plurals_object.as_object().for_each_member([&](auto const& loc, auto const& rules) {
|
||||
auto locale = locale_data.locales.get(loc);
|
||||
if (!locale.has_value())
|
||||
return;
|
||||
|
||||
rules.as_object().for_each_member([&](auto const& key, auto const& condition) {
|
||||
VERIFY(key.starts_with(rule_prefix));
|
||||
|
||||
auto category = key.substring_view(rule_prefix.length());
|
||||
parse_condition(category, condition.as_string(), locale->rules_for_form(form));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges
|
||||
static ErrorOr<void> parse_plural_ranges(String core_supplemental_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
static constexpr auto start_segment = "-start-"sv;
|
||||
static constexpr auto end_segment = "-end-"sv;
|
||||
|
||||
LexicalPath plural_ranges_path(move(core_supplemental_path));
|
||||
plural_ranges_path = plural_ranges_path.append("pluralRanges.json"sv);
|
||||
|
||||
auto plural_ranges = TRY(read_json_file(plural_ranges_path.string()));
|
||||
auto const& supplemental_object = plural_ranges.as_object().get("supplemental"sv);
|
||||
auto const& plurals_object = supplemental_object.as_object().get("plurals"sv);
|
||||
|
||||
plurals_object.as_object().for_each_member([&](auto const& loc, auto const& ranges_object) {
|
||||
auto locale = locale_data.locales.get(loc);
|
||||
if (!locale.has_value())
|
||||
return;
|
||||
|
||||
ranges_object.as_object().for_each_member([&](auto const& range, auto const& category) {
|
||||
auto start_index = range.find(start_segment);
|
||||
VERIFY(start_index.has_value());
|
||||
|
||||
auto end_index = range.find(end_segment);
|
||||
VERIFY(end_index.has_value());
|
||||
|
||||
*start_index += start_segment.length();
|
||||
|
||||
auto start = range.substring(*start_index, *end_index - *start_index);
|
||||
auto end = range.substring(*end_index + end_segment.length());
|
||||
|
||||
locale->plural_ranges.empend(move(start), move(end), category.as_string());
|
||||
});
|
||||
});
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
auto identity_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
|
||||
|
||||
LexicalPath core_supplemental_path(move(core_path));
|
||||
core_supplemental_path = core_supplemental_path.append("supplemental"sv);
|
||||
VERIFY(Core::File::is_directory(core_supplemental_path.string()));
|
||||
|
||||
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
|
||||
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale.language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
|
||||
builder.appendff("-{}", script);
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
|
||||
builder.appendff("-{}", region);
|
||||
|
||||
return builder.build();
|
||||
};
|
||||
|
||||
while (identity_iterator.has_next()) {
|
||||
auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator));
|
||||
auto language = TRY(remove_variants_from_path(locale_path));
|
||||
|
||||
locale_data.locales.ensure(language);
|
||||
}
|
||||
|
||||
TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, locale_data));
|
||||
TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, locale_data));
|
||||
TRY(parse_plural_ranges(core_supplemental_path.string(), locale_data));
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData&)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <AK/Types.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Unicode {
|
||||
)~~~");
|
||||
|
||||
generator.append(R"~~~(
|
||||
}
|
||||
)~~~");
|
||||
|
||||
TRY(file.write(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
auto locales = locale_data.locales.keys();
|
||||
quick_sort(locales);
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <AK/Array.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/PluralRules.h>
|
||||
#include <LibUnicode/UnicodeLocale.h>
|
||||
#include <LibUnicode/UnicodePluralRules.h>
|
||||
#include <math.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
using PluralCategoryFunction = PluralCategory(*)(PluralOperands);
|
||||
using PluralRangeFunction = PluralCategory(*)(PluralCategory, PluralCategory);
|
||||
|
||||
static PluralCategory default_category(PluralOperands)
|
||||
{
|
||||
return PluralCategory::Other;
|
||||
}
|
||||
|
||||
static PluralCategory default_range(PluralCategory, PluralCategory end)
|
||||
{
|
||||
return end;
|
||||
}
|
||||
|
||||
)~~~");
|
||||
|
||||
auto append_rules = [&](auto form, auto const& locale, auto const& rules) {
|
||||
if (rules.is_empty())
|
||||
return;
|
||||
|
||||
generator.set("method"sv, Locale::generated_method_name(form, locale));
|
||||
HashTable<String> generated_variables;
|
||||
|
||||
generator.append(R"~~~(
|
||||
static PluralCategory @method@([[maybe_unused]] PluralOperands ops)
|
||||
{)~~~");
|
||||
|
||||
for (auto [category, condition] : rules) {
|
||||
condition.generate_precomputed_variables(generator, generated_variables);
|
||||
|
||||
generator.append(R"~~~(
|
||||
if ()~~~");
|
||||
|
||||
generator.set("category"sv, format_identifier({}, category));
|
||||
condition.generate_condition(generator);
|
||||
|
||||
generator.append(R"~~~()
|
||||
return PluralCategory::@category@;)~~~");
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
return PluralCategory::Other;
|
||||
}
|
||||
)~~~");
|
||||
};
|
||||
|
||||
auto append_ranges = [&](auto const& locale, auto const& ranges) {
|
||||
if (ranges.is_empty())
|
||||
return;
|
||||
|
||||
generator.set("method"sv, Locale::generated_method_name("range"sv, locale));
|
||||
|
||||
generator.append(R"~~~(
|
||||
static PluralCategory @method@(PluralCategory start, PluralCategory end)
|
||||
{)~~~");
|
||||
|
||||
for (auto const& range : ranges) {
|
||||
generator.set("start"sv, format_identifier({}, range.start));
|
||||
generator.set("end"sv, format_identifier({}, range.end));
|
||||
generator.set("category"sv, format_identifier({}, range.category));
|
||||
|
||||
generator.append(R"~~~(
|
||||
if (start == PluralCategory::@start@ && end == PluralCategory::@end@)
|
||||
return PluralCategory::@category@;)~~~");
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
return end;
|
||||
}
|
||||
)~~~");
|
||||
};
|
||||
|
||||
auto append_lookup_table = [&](auto type, auto form, auto default_, auto data_for_locale) {
|
||||
generator.set("type"sv, type);
|
||||
generator.set("form"sv, form);
|
||||
generator.set("default"sv, default_);
|
||||
generator.set("size"sv, String::number(locales.size()));
|
||||
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<@type@, @size@> s_@form@_functions { {)~~~");
|
||||
|
||||
for (auto const& locale : locales) {
|
||||
auto& rules = data_for_locale(locale_data.locales.find(locale)->value, form);
|
||||
|
||||
if (rules.is_empty()) {
|
||||
generator.append(R"~~~(
|
||||
@default@,)~~~");
|
||||
} else {
|
||||
generator.set("method"sv, Locale::generated_method_name(form, locale));
|
||||
generator.append(R"~~~(
|
||||
@method@,)~~~");
|
||||
}
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
)~~~");
|
||||
};
|
||||
|
||||
auto append_categories = [&](auto const& name, auto const& rules) {
|
||||
generator.set("name", name);
|
||||
generator.set("size", String::number(rules.size() + 1));
|
||||
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<PluralCategory, @size@> @name@ { { PluralCategory::Other)~~~");
|
||||
|
||||
for (auto [category, condition] : rules) {
|
||||
generator.set("category"sv, format_identifier({}, category));
|
||||
generator.append(", PluralCategory::@category@"sv);
|
||||
}
|
||||
|
||||
generator.append("} };");
|
||||
};
|
||||
|
||||
for (auto [locale, rules] : locale_data.locales) {
|
||||
append_rules("cardinal"sv, locale, rules.cardinal_rules);
|
||||
append_rules("ordinal"sv, locale, rules.ordinal_rules);
|
||||
append_ranges(locale, rules.plural_ranges);
|
||||
}
|
||||
|
||||
append_lookup_table("PluralCategoryFunction"sv, "cardinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); });
|
||||
append_lookup_table("PluralCategoryFunction"sv, "ordinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); });
|
||||
append_lookup_table("PluralRangeFunction"sv, "range"sv, "default_range"sv, [](auto& rules, auto) -> Ranges& { return rules.plural_ranges; });
|
||||
|
||||
generate_mapping(generator, locales, "PluralCategory"sv, "s_cardinal_categories"sv, "s_cardinal_categories_{}"sv, format_identifier,
|
||||
[&](auto const& name, auto const& locale) {
|
||||
auto& rules = locale_data.locales.find(locale)->value;
|
||||
append_categories(name, rules.rules_for_form("cardinal"sv));
|
||||
});
|
||||
|
||||
generate_mapping(generator, locales, "PluralCategory"sv, "s_ordinal_categories"sv, "s_ordinal_categories_{}"sv, format_identifier,
|
||||
[&](auto const& name, auto const& locale) {
|
||||
auto& rules = locale_data.locales.find(locale)->value;
|
||||
append_categories(name, rules.rules_for_form("ordinal"sv));
|
||||
});
|
||||
|
||||
generator.append(R"~~~(
|
||||
PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return PluralCategory::Other;
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
PluralCategoryFunction decider { nullptr };
|
||||
|
||||
switch (form) {
|
||||
case PluralForm::Cardinal:
|
||||
decider = s_cardinal_functions[locale_index];
|
||||
break;
|
||||
case PluralForm::Ordinal:
|
||||
decider = s_ordinal_functions[locale_index];
|
||||
break;
|
||||
}
|
||||
|
||||
return decider(move(operands));
|
||||
}
|
||||
|
||||
Span<PluralCategory const> available_plural_categories(StringView locale, PluralForm form)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return {};
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
|
||||
switch (form) {
|
||||
case PluralForm::Cardinal:
|
||||
return s_cardinal_categories[locale_index];
|
||||
case PluralForm::Ordinal:
|
||||
return s_ordinal_categories[locale_index];
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return PluralCategory::Other;
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
|
||||
PluralRangeFunction decider = s_range_functions[locale_index];
|
||||
return decider(start, end);
|
||||
}
|
||||
|
||||
}
|
||||
)~~~");
|
||||
|
||||
TRY(file.write(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||
{
|
||||
StringView generated_header_path;
|
||||
StringView generated_implementation_path;
|
||||
StringView core_path;
|
||||
StringView locale_names_path;
|
||||
|
||||
Core::ArgsParser args_parser;
|
||||
args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
|
||||
args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
|
||||
args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path");
|
||||
args_parser.parse(arguments);
|
||||
|
||||
auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
|
||||
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
|
||||
|
||||
UnicodeLocaleData locale_data;
|
||||
TRY(parse_all_locales(core_path, locale_names_path, locale_data));
|
||||
|
||||
TRY(generate_unicode_locale_header(*generated_header_file, locale_data));
|
||||
TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,312 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "GeneratorUtil.h"
|
||||
#include <AK/Format.h>
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/JsonObject.h>
|
||||
#include <AK/JsonParser.h>
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/SourceGenerator.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibCore/DirIterator.h>
|
||||
#include <LibCore/Stream.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/RelativeTimeFormat.h>
|
||||
|
||||
using StringIndexType = u16;
|
||||
constexpr auto s_string_index_type = "u16"sv;
|
||||
|
||||
using RelativeTimeFormatIndexType = u16;
|
||||
constexpr auto s_relative_time_format_index_type = "u16"sv;
|
||||
|
||||
struct RelativeTimeFormat {
|
||||
unsigned hash() const
|
||||
{
|
||||
auto hash = time_unit.hash();
|
||||
hash = pair_int_hash(hash, style.hash());
|
||||
hash = pair_int_hash(hash, plurality.hash());
|
||||
hash = pair_int_hash(hash, tense_or_number);
|
||||
hash = pair_int_hash(hash, pattern);
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool operator==(RelativeTimeFormat const& other) const
|
||||
{
|
||||
return (time_unit == other.time_unit)
|
||||
&& (plurality == other.plurality)
|
||||
&& (style == other.style)
|
||||
&& (tense_or_number == other.tense_or_number)
|
||||
&& (pattern == other.pattern);
|
||||
}
|
||||
|
||||
String time_unit;
|
||||
String style;
|
||||
String plurality;
|
||||
StringIndexType tense_or_number { 0 };
|
||||
StringIndexType pattern { 0 };
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
|
||||
ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
|
||||
{
|
||||
return Formatter<FormatString>::format(builder,
|
||||
"{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv,
|
||||
format.time_unit,
|
||||
format.style,
|
||||
format.plurality,
|
||||
format.tense_or_number,
|
||||
format.pattern);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
|
||||
static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
|
||||
};
|
||||
|
||||
struct Locale {
|
||||
Vector<RelativeTimeFormatIndexType> time_units;
|
||||
};
|
||||
|
||||
struct UnicodeLocaleData {
|
||||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
|
||||
|
||||
HashMap<String, Locale> locales;
|
||||
};
|
||||
|
||||
static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
|
||||
{
|
||||
LexicalPath date_fields_path(move(locale_dates_path));
|
||||
date_fields_path = date_fields_path.append("dateFields.json"sv);
|
||||
|
||||
auto date_fields = TRY(read_json_file(date_fields_path.string()));
|
||||
auto const& main_object = date_fields.as_object().get("main"sv);
|
||||
auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
|
||||
auto const& dates_object = locale_object.as_object().get("dates"sv);
|
||||
auto const& fields_object = dates_object.as_object().get("fields"sv);
|
||||
|
||||
auto is_sanctioned_unit = [](auto unit) {
|
||||
// This is a copy of the time units sanctioned for use within ECMA-402.
|
||||
// https://tc39.es/ecma402/#sec-singularrelativetimeunit
|
||||
return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
|
||||
};
|
||||
|
||||
auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
|
||||
RelativeTimeFormat format {};
|
||||
format.time_unit = unit.to_titlecase_string();
|
||||
format.style = style.to_titlecase_string();
|
||||
format.plurality = plurality.to_titlecase_string();
|
||||
format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
|
||||
format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
|
||||
|
||||
locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
|
||||
};
|
||||
|
||||
fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
|
||||
auto segments = unit_and_style.split_view('-');
|
||||
auto unit = segments[0];
|
||||
auto style = (segments.size() > 1) ? segments[1] : "long"sv;
|
||||
|
||||
if (!is_sanctioned_unit(unit))
|
||||
return;
|
||||
|
||||
patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
|
||||
constexpr auto number_key = "relative-type-"sv;
|
||||
constexpr auto tense_key = "relativeTime-type-"sv;
|
||||
constexpr auto plurality_key = "relativeTimePattern-count-"sv;
|
||||
|
||||
if (type.starts_with(number_key)) {
|
||||
auto number = type.substring_view(number_key.length());
|
||||
parse_pattern(unit, style, "Other"sv, number, pattern_value);
|
||||
} else if (type.starts_with(tense_key)) {
|
||||
pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
|
||||
VERIFY(key.starts_with(plurality_key));
|
||||
auto plurality = key.substring_view(plurality_key.length());
|
||||
auto tense = type.substring_view(tense_key.length());
|
||||
|
||||
parse_pattern(unit, style, plurality, tense, pattern);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
|
||||
|
||||
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
|
||||
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale.language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
|
||||
builder.appendff("-{}", script);
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
|
||||
builder.appendff("-{}", region);
|
||||
|
||||
return builder.build();
|
||||
};
|
||||
|
||||
while (dates_iterator.has_next()) {
|
||||
auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
|
||||
auto language = TRY(remove_variants_from_path(dates_path));
|
||||
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_date_fields(move(dates_path), locale_data, locale));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData&)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.append(R"~~~(
|
||||
#pragma once
|
||||
|
||||
#include <LibUnicode/Forward.h>
|
||||
|
||||
namespace Unicode {
|
||||
)~~~");
|
||||
|
||||
generator.append(R"~~~(
|
||||
}
|
||||
)~~~");
|
||||
|
||||
TRY(file.write(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
generator.set("string_index_type"sv, s_string_index_type);
|
||||
generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <AK/Array.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/PluralRules.h>
|
||||
#include <LibUnicode/RelativeTimeFormat.h>
|
||||
#include <LibUnicode/UnicodeRelativeTimeFormat.h>
|
||||
|
||||
namespace Unicode {
|
||||
)~~~");
|
||||
|
||||
locale_data.unique_strings.generate(generator);
|
||||
|
||||
generator.append(R"~~~(
|
||||
struct RelativeTimeFormatImpl {
|
||||
RelativeTimeFormat to_relative_time_format() const
|
||||
{
|
||||
RelativeTimeFormat relative_time_format {};
|
||||
relative_time_format.plurality = plurality;
|
||||
relative_time_format.pattern = decode_string(pattern);
|
||||
|
||||
return relative_time_format;
|
||||
}
|
||||
|
||||
TimeUnit time_unit;
|
||||
Style style;
|
||||
PluralCategory plurality;
|
||||
@string_index_type@ tense_or_number { 0 };
|
||||
@string_index_type@ pattern { 0 };
|
||||
};
|
||||
)~~~");
|
||||
|
||||
locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
|
||||
|
||||
auto append_list = [&](String name, auto const& list) {
|
||||
generator.set("name", name);
|
||||
generator.set("size", String::number(list.size()));
|
||||
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
|
||||
|
||||
bool first = true;
|
||||
for (auto index : list) {
|
||||
generator.append(first ? " "sv : ", "sv);
|
||||
generator.append(String::number(index));
|
||||
first = false;
|
||||
}
|
||||
|
||||
generator.append(" } };");
|
||||
};
|
||||
|
||||
generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
|
||||
|
||||
generator.append(R"~~~(
|
||||
Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
|
||||
{
|
||||
Vector<RelativeTimeFormat> formats;
|
||||
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return formats;
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
|
||||
|
||||
for (auto const& locale_format_index : locale_formats) {
|
||||
auto const& locale_format = s_relative_time_formats.at(locale_format_index);
|
||||
|
||||
if (locale_format.time_unit != time_unit)
|
||||
continue;
|
||||
if (locale_format.style != style)
|
||||
continue;
|
||||
if (decode_string(locale_format.tense_or_number) != tense_or_number)
|
||||
continue;
|
||||
|
||||
formats.append(locale_format.to_relative_time_format());
|
||||
}
|
||||
|
||||
return formats;
|
||||
}
|
||||
|
||||
}
|
||||
)~~~");
|
||||
|
||||
TRY(file.write(generator.as_string_view().bytes()));
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||
{
|
||||
StringView generated_header_path;
|
||||
StringView generated_implementation_path;
|
||||
StringView dates_path;
|
||||
|
||||
Core::ArgsParser args_parser;
|
||||
args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
|
||||
args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
|
||||
args_parser.parse(arguments);
|
||||
|
||||
auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
|
||||
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
|
||||
|
||||
UnicodeLocaleData locale_data;
|
||||
TRY(parse_all_locales(dates_path, locale_data));
|
||||
|
||||
TRY(generate_unicode_locale_header(*generated_header_file, locale_data));
|
||||
TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data));
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue