From ea78bac36db527abe0e80dce17dc03372e4f7065 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 7 Jul 2022 09:44:17 -0400 Subject: [PATCH] LibUnicode: Parse and generate per-locale plural rules from the CLDR Plural rules in the CLDR are of the form: "cs": { "pluralRule-count-one": "i = 1 and v = 0 @integer 1", "pluralRule-count-few": "i = 2..4 and v = 0 @integer 2~4", "pluralRule-count-many": "v != 0 @decimal 0.0~1.5, 10.0, 100.0 ...", "pluralRule-count-other": "@integer 0, 5~19, 100, 1000, 10000 ..." } The syntax is described here: https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax There are up to 2 sets of rules for each locale, a cardinal set and an ordinal set. The approach here is to generate a C++ function for each set of rules. Each condition in the rules (e.g. "i = 1 and v = 0") is transpiled to a C++ if-statement within its function. Then lookup tables are generated to match locales to their generated functions. NOTE: -Wno-parentheses-equality is added to the LibUnicodeData compile flags because the generated plural rules have lots of extra parentheses (because e.g. we need to selectively negate and combine rules). The code to generate only exactly the right number of parentheses is quite hairy, so this just tells the compiler to ignore the extras. --- Meta/CMake/unicode_data.cmake | 17 + Meta/Lagom/CMakeLists.txt | 1 + .../CodeGenerators/LibUnicode/CMakeLists.txt | 1 + .../LibUnicode/GenerateUnicodePluralRules.cpp | 569 ++++++++++++++++++ Userland/Libraries/LibUnicode/CMakeLists.txt | 3 +- Userland/Libraries/LibUnicode/Forward.h | 2 + Userland/Libraries/LibUnicode/PluralRules.cpp | 59 ++ Userland/Libraries/LibUnicode/PluralRules.h | 65 ++ 8 files changed, 716 insertions(+), 1 deletion(-) create mode 100644 Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp create mode 100644 Userland/Libraries/LibUnicode/PluralRules.cpp create mode 100644 Userland/Libraries/LibUnicode/PluralRules.h diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index b0c66d4edc..0adbed2d50 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -141,6 +141,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(UNICODE_NUMBER_FORMAT_HEADER LibUnicode/UnicodeNumberFormat.h) set(UNICODE_NUMBER_FORMAT_IMPLEMENTATION LibUnicode/UnicodeNumberFormat.cpp) + set(UNICODE_PLURAL_RULES_HEADER LibUnicode/UnicodePluralRules.h) + set(UNICODE_PLURAL_RULES_IMPLEMENTATION LibUnicode/UnicodePluralRules.cpp) + set(UNICODE_RELATIVE_TIME_FORMAT_HEADER LibUnicode/UnicodeRelativeTimeFormat.h) set(UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION LibUnicode/UnicodeRelativeTimeFormat.cpp) @@ -159,6 +162,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(UNICODE_NUMBER_FORMAT_HEADER UnicodeNumberFormat.h) set(UNICODE_NUMBER_FORMAT_IMPLEMENTATION UnicodeNumberFormat.cpp) + set(UNICODE_PLURAL_RULES_HEADER UnicodePluralRules.h) + set(UNICODE_PLURAL_RULES_IMPLEMENTATION UnicodePluralRules.cpp) + set(UNICODE_RELATIVE_TIME_FORMAT_HEADER UnicodeRelativeTimeFormat.h) set(UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION UnicodeRelativeTimeFormat.cpp) @@ -201,6 +207,15 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${UNICODE_NUMBER_FORMAT_IMPLEMENTATION}" arguments -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -u "${CLDR_UNITS_PATH}" ) + invoke_generator( + "UnicodePluralRules" + Lagom::GenerateUnicodePluralRules + "${CLDR_VERSION_FILE}" + "${UNICODE_META_TARGET_PREFIX}" + "${UNICODE_PLURAL_RULES_HEADER}" + "${UNICODE_PLURAL_RULES_IMPLEMENTATION}" + arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" + ) invoke_generator( "UnicodeRelativeTimeFormat" Lagom::GenerateUnicodeRelativeTimeFormat @@ -220,6 +235,8 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) ${UNICODE_LOCALE_IMPLEMENTATION} ${UNICODE_NUMBER_FORMAT_HEADER} ${UNICODE_NUMBER_FORMAT_IMPLEMENTATION} + ${UNICODE_PLURAL_RULES_HEADER} + ${UNICODE_PLURAL_RULES_IMPLEMENTATION} ${UNICODE_RELATIVE_TIME_FORMAT_HEADER} ${UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION} ) diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index bc47898f2a..228138acb3 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -503,6 +503,7 @@ if (BUILD_LAGOM) SOURCES ${LIBUNICODE_SOURCES} ${UNICODE_DATA_SOURCES} ) target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$) + target_compile_options(LibUnicode PRIVATE -Wno-parentheses-equality) target_link_libraries(LibUnicode LibTimeZone) # WASM diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt index 4fa8a22981..a343858d11 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt @@ -2,4 +2,5 @@ lagom_tool(GenerateUnicodeData SOURCES GenerateUnicodeData.cpp LIBS LibMain) lagom_tool(GenerateUnicodeDateTimeFormat SOURCES GenerateUnicodeDateTimeFormat.cpp LIBS LibMain LibTimeZone) lagom_tool(GenerateUnicodeLocale SOURCES GenerateUnicodeLocale.cpp LIBS LibMain) lagom_tool(GenerateUnicodeNumberFormat SOURCES GenerateUnicodeNumberFormat.cpp LIBS LibMain) +lagom_tool(GenerateUnicodePluralRules SOURCES GenerateUnicodePluralRules.cpp LIBS LibMain) lagom_tool(GenerateUnicodeRelativeTimeFormat SOURCES GenerateUnicodeRelativeTimeFormat.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp new file mode 100644 index 0000000000..3b8fdc818d --- /dev/null +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "GeneratorUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using StringIndexType = u16; + +static String format_identifier(StringView owner, String identifier) +{ + identifier = identifier.replace("-"sv, "_"sv, ReplaceMode::All); + + if (all_of(identifier, is_ascii_digit)) + return String::formatted("{}_{}", owner[0], identifier); + if (is_ascii_lower_alpha(identifier[0])) + return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); + return identifier; +} + +struct Relation { + using Range = Array; + using Comparator = Variant; + + enum class Type { + Equality, + Inequality, + }; + + String const& modulus_variable_name() const + { + VERIFY(modulus.has_value()); + + if (!cached_modulus_variable_name.has_value()) + cached_modulus_variable_name = String::formatted("mod_{}_{}", symbol, *modulus); + + return *cached_modulus_variable_name; + } + + String const& exponential_variable_name() const + { + if (!cached_exponential_variable_name.has_value()) + cached_exponential_variable_name = String::formatted("exp_{}", symbol); + + return *cached_exponential_variable_name; + } + + void generate_relation(SourceGenerator& generator) const + { + auto append_variable_name = [&]() { + if (modulus.has_value()) + generator.append(modulus_variable_name()); + else if (symbol == 'e' || symbol == 'c') + generator.append(exponential_variable_name()); + else + generator.append(String::formatted("ops.{}", Unicode::PluralOperands::symbol_to_variable_name(symbol))); + }; + + auto append_value = [&](u32 value) { + append_variable_name(); + generator.append(" == "sv); + generator.append(String::number(value)); + }; + + auto append_range = [&](auto const& range) { + // This check avoids generating "0 <= unsigned_value", which is always true. + if (range[0] != 0 || Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { + generator.append(String::formatted("{} <= ", range[0])); + append_variable_name(); + generator.append(" && "sv); + } + + append_variable_name(); + generator.append(String::formatted(" <= {}", range[1])); + }; + + if (type == Type::Inequality) + generator.append("!"sv); + + generator.append("("sv); + + bool first = true; + for (auto const& comparator : comparators) { + generator.append(first ? "("sv : " || ("sv); + + comparator.visit( + [&](u32 value) { append_value(value); }, + [&](Range const& range) { append_range(range); }); + + generator.append(")"sv); + first = false; + } + + generator.append(")"sv); + } + + void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const + { + // FIXME: How do we handle the exponential symbols? They seem unused by ECMA-402. + if (symbol == 'e' || symbol == 'c') { + if (auto variable = exponential_variable_name(); !generated_variables.contains(variable)) { + generated_variables.set(variable); + generator.set("variable"sv, move(variable)); + generator.append(R"~~~( + auto @variable@ = 0;)~~~"); + } + } + + if (!modulus.has_value()) + return; + + auto variable = modulus_variable_name(); + if (generated_variables.contains(variable)) + return; + + generated_variables.set(variable); + generator.set("variable"sv, move(variable)); + generator.set("operand"sv, Unicode::PluralOperands::symbol_to_variable_name(symbol)); + generator.set("modulus"sv, String::number(*modulus)); + + if (Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { + generator.append(R"~~~( + auto @variable@ = fmod(ops.@operand@, @modulus@);)~~~"); + } else { + generator.append(R"~~~( + auto @variable@ = ops.@operand@ % @modulus@;)~~~"); + } + } + + Type type; + char symbol { 0 }; + Optional modulus; + Vector comparators; + +private: + mutable Optional cached_modulus_variable_name; + mutable Optional cached_exponential_variable_name; +}; + +struct Condition { + void generate_condition(SourceGenerator& generator) const + { + for (size_t i = 0; i < relations.size(); ++i) { + if (i > 0) + generator.append(" || "sv); + + auto const& conjunctions = relations[i]; + if (conjunctions.size() > 1) + generator.append("("sv); + + for (size_t j = 0; j < conjunctions.size(); ++j) { + if (j > 0) + generator.append(" && "sv); + conjunctions[j].generate_relation(generator); + } + + if (conjunctions.size() > 1) + generator.append(")"sv); + } + } + + void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const + { + for (auto const& conjunctions : relations) { + for (auto const& relation : conjunctions) + relation.generate_precomputed_variables(generator, generated_variables); + } + } + + Vector> relations; +}; + +struct Locale { + static String generated_method_name(StringView form, StringView locale) + { + return String::formatted("{}_plurality_{}", form, format_identifier({}, locale)); + } + + HashMap& rules_for_form(StringView form) + { + if (form == "cardinal") + return cardinal_rules; + if (form == "ordinal") + return ordinal_rules; + VERIFY_NOT_REACHED(); + } + + HashMap cardinal_rules; + HashMap ordinal_rules; +}; + +struct UnicodeLocaleData { + UniqueStringStorage unique_strings; + + HashMap locales; + Vector categories; +}; + +static Relation parse_relation(StringView relation) +{ + static constexpr auto equality_operator = " = "sv; + static constexpr auto inequality_operator = " != "sv; + static constexpr auto modulus_operator = " % "sv; + static constexpr auto range_operator = ".."sv; + static constexpr auto set_operator = ','; + + Relation parsed; + + StringView lhs; + StringView rhs; + + if (auto index = relation.find(equality_operator); index.has_value()) { + parsed.type = Relation::Type::Equality; + lhs = relation.substring_view(0, *index); + rhs = relation.substring_view(*index + equality_operator.length()); + } else if (auto index = relation.find(inequality_operator); index.has_value()) { + parsed.type = Relation::Type::Inequality; + lhs = relation.substring_view(0, *index); + rhs = relation.substring_view(*index + inequality_operator.length()); + } else { + VERIFY_NOT_REACHED(); + } + + if (auto index = lhs.find(modulus_operator); index.has_value()) { + auto symbol = lhs.substring_view(0, *index); + VERIFY(symbol.length() == 1); + + auto modulus = lhs.substring_view(*index + modulus_operator.length()).to_uint(); + VERIFY(modulus.has_value()); + + parsed.symbol = symbol[0]; + parsed.modulus = move(modulus); + } else { + VERIFY(lhs.length() == 1); + parsed.symbol = lhs[0]; + } + + rhs.for_each_split_view(set_operator, false, [&](auto set) { + if (auto index = set.find(range_operator); index.has_value()) { + auto range_begin = set.substring_view(0, *index).to_uint(); + VERIFY(range_begin.has_value()); + + auto range_end = set.substring_view(*index + range_operator.length()).to_uint(); + VERIFY(range_end.has_value()); + + parsed.comparators.empend(Array { *range_begin, *range_end }); + } else { + auto value = set.to_uint(); + VERIFY(value.has_value()); + + parsed.comparators.empend(*value); + } + }); + + return parsed; +} + +// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax +// +// A very simplified view of a plural rule is: +// +// condition.* ([@integer|@decimal] sample)+ +// +// The "sample" being series of integer or decimal values that fit the specified condition. The +// condition may be one or more binary expressions, chained together with "and" or "or" operators. +static void parse_condition(StringView category, StringView rule, HashMap& rules) +{ + static constexpr auto other_category = "other"sv; + static constexpr auto disjunction_keyword = " or "sv; + static constexpr auto conjunction_keyword = " and "sv; + + // We don't need the examples in the generated code, so we can drop them here. + auto example_index = rule.find('@'); + VERIFY(example_index.has_value()); + + auto condition = rule.substring_view(0, *example_index).trim_whitespace(); + + // Our implementation does not generate rules for the "other" category. We simply return "other" + // for values that do not match any rules. This will need to be revisited if this VERIFY fails. + if (condition.is_empty()) { + VERIFY(category == other_category); + return; + } + + auto& relation_list = rules.ensure(category); + + // The grammar for a condition (i.e. a chain of relations) is: + // + // condition = and_condition ('or' and_condition)* + // and_condition = relation ('and' relation)* + // + // This affords some simplicity in that disjunctions are never embedded within a conjunction. + condition.for_each_split_view(disjunction_keyword, false, [&](auto disjunction) { + Vector conjunctions; + + disjunction.for_each_split_view(conjunction_keyword, false, [&](auto relation) { + conjunctions.append(parse_relation(relation)); + }); + + relation_list.relations.append(move(conjunctions)); + }); +} + +static ErrorOr parse_plural_rules(String core_supplemental_path, StringView file_name, UnicodeLocaleData& locale_data) +{ + static constexpr auto form_prefix = "plurals-type-"sv; + static constexpr auto rule_prefix = "pluralRule-count-"sv; + + LexicalPath plurals_path(move(core_supplemental_path)); + plurals_path = plurals_path.append(file_name); + + auto plurals = TRY(read_json_file(plurals_path.string())); + auto const& supplemental_object = plurals.as_object().get("supplemental"sv); + + supplemental_object.as_object().for_each_member([&](auto const& key, auto const& plurals_object) { + if (!key.starts_with(form_prefix)) + return; + + auto form = key.substring_view(form_prefix.length()); + + plurals_object.as_object().for_each_member([&](auto const& loc, auto const& rules) { + auto locale = locale_data.locales.get(loc); + if (!locale.has_value()) + return; + + rules.as_object().for_each_member([&](auto const& key, auto const& condition) { + VERIFY(key.starts_with(rule_prefix)); + + auto category = key.substring_view(rule_prefix.length()); + parse_condition(category, condition.as_string(), locale->rules_for_form(form)); + + if (!locale_data.categories.contains_slow(category)) + locale_data.categories.append(category); + }); + }); + }); + + return {}; +} + +static ErrorOr parse_all_locales(String core_path, String locale_names_path, UnicodeLocaleData& locale_data) +{ + auto identity_iterator = TRY(path_to_dir_iterator(move(locale_names_path))); + + LexicalPath core_supplemental_path(move(core_path)); + core_supplemental_path = core_supplemental_path.append("supplemental"sv); + VERIFY(Core::File::is_directory(core_supplemental_path.string())); + + auto remove_variants_from_path = [&](String path) -> ErrorOr { + auto parsed_locale = TRY(CanonicalLanguageID::parse(locale_data.unique_strings, LexicalPath::basename(path))); + + StringBuilder builder; + builder.append(locale_data.unique_strings.get(parsed_locale.language)); + if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty()) + builder.appendff("-{}", script); + if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty()) + builder.appendff("-{}", region); + + return builder.build(); + }; + + while (identity_iterator.has_next()) { + auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator)); + auto language = TRY(remove_variants_from_path(locale_path)); + + locale_data.locales.ensure(language); + } + + TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, locale_data)); + TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, locale_data)); + return {}; +} + +static ErrorOr generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + + generator.append(R"~~~( +#include + +#pragma once + +namespace Unicode { +)~~~"); + + generate_enum(generator, format_identifier, "PluralCategory"sv, {}, locale_data.categories); + + generator.append(R"~~~( +} +)~~~"); + + TRY(file.write(generator.as_string_view().bytes())); + return {}; +} + +static ErrorOr generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + + auto locales = locale_data.locales.keys(); + quick_sort(locales); + + generator.append(R"~~~( +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Unicode { + +using PluralCategoryFunction = PluralCategory(*)(PluralOperands); + +static PluralCategory default_category(PluralOperands) +{ + return PluralCategory::Other; +} + +)~~~"); + + auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values) { + HashValueMap hashes; + hashes.ensure_capacity(values.size()); + + for (auto const& value : values) + hashes.set(value.hash(), format_identifier(enum_title, value)); + + generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes)); + generate_value_to_string(generator, "{}_to_string"sv, enum_title, enum_snake, format_identifier, values); + }; + + auto append_rules = [&](auto form, auto const& locale, auto const& rules) { + if (rules.is_empty()) + return; + + generator.set("method"sv, Locale::generated_method_name(form, locale)); + HashTable generated_variables; + + generator.append(R"~~~( +static PluralCategory @method@([[maybe_unused]] PluralOperands ops) +{)~~~"); + + for (auto [category, condition] : rules) { + condition.generate_precomputed_variables(generator, generated_variables); + + generator.append(R"~~~( + if ()~~~"); + + generator.set("category"sv, format_identifier({}, category)); + condition.generate_condition(generator); + + generator.append(R"~~~() + return PluralCategory::@category@;)~~~"); + } + + generator.append(R"~~~( + return PluralCategory::Other; +} +)~~~"); + }; + + auto append_lookup_table = [&](auto form) { + generator.set("form"sv, form); + generator.set("size"sv, String::number(locales.size())); + + generator.append(R"~~~( +static constexpr Array s_@form@_functions { {)~~~"); + + for (auto const& locale : locales) { + auto& rules = locale_data.locales.find(locale)->value; + + if (rules.rules_for_form(form).is_empty()) { + generator.append(R"~~~( + default_category,)~~~"); + } else { + generator.set("method"sv, Locale::generated_method_name(form, locale)); + generator.append(R"~~~( + @method@,)~~~"); + } + } + + generator.append(R"~~~( +} }; +)~~~"); + }; + + append_string_conversions("PluralCategory"sv, "plural_category"sv, locale_data.categories); + + for (auto [locale, rules] : locale_data.locales) { + append_rules("cardinal"sv, locale, rules.cardinal_rules); + append_rules("ordinal"sv, locale, rules.ordinal_rules); + } + + append_lookup_table("cardinal"sv); + append_lookup_table("ordinal"sv); + + generator.append(R"~~~( +PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands) +{ + auto locale_value = locale_from_string(locale); + if (!locale_value.has_value()) + return PluralCategory::Other; + + auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. + PluralCategoryFunction decider { nullptr }; + + switch (form) { + case PluralForm::Cardinal: + decider = s_cardinal_functions[locale_index]; + break; + case PluralForm::Ordinal: + decider = s_ordinal_functions[locale_index]; + break; + } + + return decider(move(operands)); +} + +} +)~~~"); + + TRY(file.write(generator.as_string_view().bytes())); + return {}; +} + +ErrorOr serenity_main(Main::Arguments arguments) +{ + StringView generated_header_path; + StringView generated_implementation_path; + StringView core_path; + StringView locale_names_path; + + Core::ArgsParser args_parser; + args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); + args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); + args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); + args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); + args_parser.parse(arguments); + + auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); + auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); + + UnicodeLocaleData locale_data; + TRY(parse_all_locales(core_path, locale_names_path, locale_data)); + + TRY(generate_unicode_locale_header(*generated_header_file, locale_data)); + TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data)); + + return 0; +} diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 4f9c5527b3..701489311d 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -3,7 +3,7 @@ include(${SerenityOS_SOURCE_DIR}/Meta/CMake/unicode_data.cmake) if (DEFINED UNICODE_DATA_SOURCES) set(SOURCES ${UNICODE_DATA_SOURCES}) serenity_lib(LibUnicodeData unicodedata) - target_compile_options(LibUnicodeData PRIVATE -g0 -Os) + target_compile_options(LibUnicodeData PRIVATE -g0 -Os -Wno-parentheses-equality) target_link_libraries(LibUnicodeData LibCore LibTimeZone) endif() @@ -13,6 +13,7 @@ set(SOURCES DateTimeFormat.cpp Locale.cpp NumberFormat.cpp + PluralRules.cpp RelativeTimeFormat.cpp ) diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 4edea7b0f5..252da3bab5 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -38,6 +38,7 @@ enum class Locale : u16; enum class MinimumDaysRegion : u8; enum class Month : u8; enum class NumericSymbol : u8; +enum class PluralCategory : u8; enum class Property : u8; enum class Script : u8; enum class ScriptTag : u8; @@ -62,6 +63,7 @@ struct LocaleID; struct NumberFormat; struct NumberGroupings; struct OtherExtension; +struct PluralOperands; struct SpecialCasing; struct TransformedExtension; struct TransformedField; diff --git a/Userland/Libraries/LibUnicode/PluralRules.cpp b/Userland/Libraries/LibUnicode/PluralRules.cpp new file mode 100644 index 0000000000..a4b8a58a7f --- /dev/null +++ b/Userland/Libraries/LibUnicode/PluralRules.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#if ENABLE_UNICODE_DATA +# include +#endif + +namespace Unicode { + +#if !ENABLE_UNICODE_DATA +enum class PluralCategory : u8 { + Other, +}; +#endif + +PluralForm plural_form_from_string(StringView plural_form) +{ + if (plural_form == "cardinal"sv) + return PluralForm::Cardinal; + if (plural_form == "ordinal"sv) + return PluralForm::Ordinal; + VERIFY_NOT_REACHED(); +} + +StringView plural_form_to_string(PluralForm plural_form) +{ + switch (plural_form) { + case PluralForm::Cardinal: + return "cardinal"sv; + case PluralForm::Ordinal: + return "ordinal"sv; + default: + VERIFY_NOT_REACHED(); + } +} + +Optional __attribute__((weak)) plural_category_from_string(StringView category) +{ + VERIFY(category == "other"sv); + return PluralCategory::Other; +} + +StringView __attribute__((weak)) plural_category_to_string(PluralCategory category) +{ + VERIFY(category == PluralCategory::Other); + return "other"sv; +} + +PluralCategory __attribute__((weak)) determine_plural_category(StringView, PluralForm, PluralOperands) +{ + return PluralCategory::Other; +} + +} diff --git a/Userland/Libraries/LibUnicode/PluralRules.h b/Userland/Libraries/LibUnicode/PluralRules.h new file mode 100644 index 0000000000..d67f95b805 --- /dev/null +++ b/Userland/Libraries/LibUnicode/PluralRules.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace Unicode { + +enum class PluralForm { + Cardinal, + Ordinal, +}; + +// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Operand_Meanings +struct PluralOperands { + static constexpr StringView symbol_to_variable_name(char symbol) + { + if (symbol == 'n') + return "number"sv; + if (symbol == 'i') + return "integer_digits"sv; + if (symbol == 'f') + return "fraction_digits"sv; + if (symbol == 'v') + return "number_of_fraction_digits"sv; + if (symbol == 't') + return "fraction_digits_without_trailing"sv; + if (symbol == 'w') + return "number_of_fraction_digits_without_trailing"sv; + VERIFY_NOT_REACHED(); + } + + static constexpr bool symbol_requires_floating_point_modulus(char symbol) + { + // From TR-35: "The modulus (% or mod) is a remainder operation as defined in Java; for + // example, where n = 4.3 the result of n mod 3 is 1.3." + // + // So, this returns whether the symbol represents a decimal value, and thus requires fmod. + return symbol == 'n'; + } + + double number { 0 }; + u64 integer_digits { 0 }; + u64 fraction_digits { 0 }; + u64 number_of_fraction_digits { 0 }; + u64 fraction_digits_without_trailing { 0 }; + u64 number_of_fraction_digits_without_trailing { 0 }; +}; + +PluralForm plural_form_from_string(StringView plural_form); +StringView plural_form_to_string(PluralForm plural_form); + +Optional plural_category_from_string(StringView category); +StringView plural_category_to_string(PluralCategory category); + +PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands); + +}