diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index b0c66d4edc..0adbed2d50 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -141,6 +141,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(UNICODE_NUMBER_FORMAT_HEADER LibUnicode/UnicodeNumberFormat.h) set(UNICODE_NUMBER_FORMAT_IMPLEMENTATION LibUnicode/UnicodeNumberFormat.cpp) + set(UNICODE_PLURAL_RULES_HEADER LibUnicode/UnicodePluralRules.h) + set(UNICODE_PLURAL_RULES_IMPLEMENTATION LibUnicode/UnicodePluralRules.cpp) + set(UNICODE_RELATIVE_TIME_FORMAT_HEADER LibUnicode/UnicodeRelativeTimeFormat.h) set(UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION LibUnicode/UnicodeRelativeTimeFormat.cpp) @@ -159,6 +162,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(UNICODE_NUMBER_FORMAT_HEADER UnicodeNumberFormat.h) set(UNICODE_NUMBER_FORMAT_IMPLEMENTATION UnicodeNumberFormat.cpp) + set(UNICODE_PLURAL_RULES_HEADER UnicodePluralRules.h) + set(UNICODE_PLURAL_RULES_IMPLEMENTATION UnicodePluralRules.cpp) + set(UNICODE_RELATIVE_TIME_FORMAT_HEADER UnicodeRelativeTimeFormat.h) set(UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION UnicodeRelativeTimeFormat.cpp) @@ -201,6 +207,15 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${UNICODE_NUMBER_FORMAT_IMPLEMENTATION}" arguments -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -u "${CLDR_UNITS_PATH}" ) + invoke_generator( + "UnicodePluralRules" + Lagom::GenerateUnicodePluralRules + "${CLDR_VERSION_FILE}" + "${UNICODE_META_TARGET_PREFIX}" + "${UNICODE_PLURAL_RULES_HEADER}" + "${UNICODE_PLURAL_RULES_IMPLEMENTATION}" + arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" + ) invoke_generator( "UnicodeRelativeTimeFormat" Lagom::GenerateUnicodeRelativeTimeFormat @@ -220,6 +235,8 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) ${UNICODE_LOCALE_IMPLEMENTATION} ${UNICODE_NUMBER_FORMAT_HEADER} ${UNICODE_NUMBER_FORMAT_IMPLEMENTATION} + ${UNICODE_PLURAL_RULES_HEADER} + ${UNICODE_PLURAL_RULES_IMPLEMENTATION} ${UNICODE_RELATIVE_TIME_FORMAT_HEADER} ${UNICODE_RELATIVE_TIME_FORMAT_IMPLEMENTATION} ) diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index bc47898f2a..228138acb3 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -503,6 +503,7 @@ if (BUILD_LAGOM) SOURCES ${LIBUNICODE_SOURCES} ${UNICODE_DATA_SOURCES} ) target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$) + target_compile_options(LibUnicode PRIVATE -Wno-parentheses-equality) target_link_libraries(LibUnicode LibTimeZone) # WASM diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt index 4fa8a22981..a343858d11 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/CMakeLists.txt @@ -2,4 +2,5 @@ lagom_tool(GenerateUnicodeData SOURCES GenerateUnicodeData.cpp LIBS LibMain) lagom_tool(GenerateUnicodeDateTimeFormat SOURCES GenerateUnicodeDateTimeFormat.cpp LIBS LibMain LibTimeZone) lagom_tool(GenerateUnicodeLocale SOURCES GenerateUnicodeLocale.cpp LIBS LibMain) lagom_tool(GenerateUnicodeNumberFormat SOURCES GenerateUnicodeNumberFormat.cpp LIBS LibMain) +lagom_tool(GenerateUnicodePluralRules SOURCES GenerateUnicodePluralRules.cpp LIBS LibMain) lagom_tool(GenerateUnicodeRelativeTimeFormat SOURCES GenerateUnicodeRelativeTimeFormat.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp new file mode 100644 index 0000000000..3b8fdc818d --- /dev/null +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "GeneratorUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using StringIndexType = u16; + +static String format_identifier(StringView owner, String identifier) +{ + identifier = identifier.replace("-"sv, "_"sv, ReplaceMode::All); + + if (all_of(identifier, is_ascii_digit)) + return String::formatted("{}_{}", owner[0], identifier); + if (is_ascii_lower_alpha(identifier[0])) + return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); + return identifier; +} + +struct Relation { + using Range = Array; + using Comparator = Variant; + + enum class Type { + Equality, + Inequality, + }; + + String const& modulus_variable_name() const + { + VERIFY(modulus.has_value()); + + if (!cached_modulus_variable_name.has_value()) + cached_modulus_variable_name = String::formatted("mod_{}_{}", symbol, *modulus); + + return *cached_modulus_variable_name; + } + + String const& exponential_variable_name() const + { + if (!cached_exponential_variable_name.has_value()) + cached_exponential_variable_name = String::formatted("exp_{}", symbol); + + return *cached_exponential_variable_name; + } + + void generate_relation(SourceGenerator& generator) const + { + auto append_variable_name = [&]() { + if (modulus.has_value()) + generator.append(modulus_variable_name()); + else if (symbol == 'e' || symbol == 'c') + generator.append(exponential_variable_name()); + else + generator.append(String::formatted("ops.{}", Unicode::PluralOperands::symbol_to_variable_name(symbol))); + }; + + auto append_value = [&](u32 value) { + append_variable_name(); + generator.append(" == "sv); + generator.append(String::number(value)); + }; + + auto append_range = [&](auto const& range) { + // This check avoids generating "0 <= unsigned_value", which is always true. + if (range[0] != 0 || Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { + generator.append(String::formatted("{} <= ", range[0])); + append_variable_name(); + generator.append(" && "sv); + } + + append_variable_name(); + generator.append(String::formatted(" <= {}", range[1])); + }; + + if (type == Type::Inequality) + generator.append("!"sv); + + generator.append("("sv); + + bool first = true; + for (auto const& comparator : comparators) { + generator.append(first ? "("sv : " || ("sv); + + comparator.visit( + [&](u32 value) { append_value(value); }, + [&](Range const& range) { append_range(range); }); + + generator.append(")"sv); + first = false; + } + + generator.append(")"sv); + } + + void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const + { + // FIXME: How do we handle the exponential symbols? They seem unused by ECMA-402. + if (symbol == 'e' || symbol == 'c') { + if (auto variable = exponential_variable_name(); !generated_variables.contains(variable)) { + generated_variables.set(variable); + generator.set("variable"sv, move(variable)); + generator.append(R"~~~( + auto @variable@ = 0;)~~~"); + } + } + + if (!modulus.has_value()) + return; + + auto variable = modulus_variable_name(); + if (generated_variables.contains(variable)) + return; + + generated_variables.set(variable); + generator.set("variable"sv, move(variable)); + generator.set("operand"sv, Unicode::PluralOperands::symbol_to_variable_name(symbol)); + generator.set("modulus"sv, String::number(*modulus)); + + if (Unicode::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { + generator.append(R"~~~( + auto @variable@ = fmod(ops.@operand@, @modulus@);)~~~"); + } else { + generator.append(R"~~~( + auto @variable@ = ops.@operand@ % @modulus@;)~~~"); + } + } + + Type type; + char symbol { 0 }; + Optional modulus; + Vector comparators; + +private: + mutable Optional cached_modulus_variable_name; + mutable Optional cached_exponential_variable_name; +}; + +struct Condition { + void generate_condition(SourceGenerator& generator) const + { + for (size_t i = 0; i < relations.size(); ++i) { + if (i > 0) + generator.append(" || "sv); + + auto const& conjunctions = relations[i]; + if (conjunctions.size() > 1) + generator.append("("sv); + + for (size_t j = 0; j < conjunctions.size(); ++j) { + if (j > 0) + generator.append(" && "sv); + conjunctions[j].generate_relation(generator); + } + + if (conjunctions.size() > 1) + generator.append(")"sv); + } + } + + void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const + { + for (auto const& conjunctions : relations) { + for (auto const& relation : conjunctions) + relation.generate_precomputed_variables(generator, generated_variables); + } + } + + Vector> relations; +}; + +struct Locale { + static String generated_method_name(StringView form, StringView locale) + { + return String::formatted("{}_plurality_{}", form, format_identifier({}, locale)); + } + + HashMap& rules_for_form(StringView form) + { + if (form == "cardinal") + return cardinal_rules; + if (form == "ordinal") + return ordinal_rules; + VERIFY_NOT_REACHED(); + } + + HashMap cardinal_rules; + HashMap ordinal_rules; +}; + +struct UnicodeLocaleData { + UniqueStringStorage unique_strings; + + HashMap locales; + Vector categories; +}; + +static Relation parse_relation(StringView relation) +{ + static constexpr auto equality_operator = " = "sv; + static constexpr auto inequality_operator = " != "sv; + static constexpr auto modulus_operator = " % "sv; + static constexpr auto range_operator = ".."sv; + static constexpr auto set_operator = ','; + + Relation parsed; + + StringView lhs; + StringView rhs; + + if (auto index = relation.find(equality_operator); index.has_value()) { + parsed.type = Relation::Type::Equality; + lhs = relation.substring_view(0, *index); + rhs = relation.substring_view(*index + equality_operator.length()); + } else if (auto index = relation.find(inequality_operator); index.has_value()) { + parsed.type = Relation::Type::Inequality; + lhs = relation.substring_view(0, *index); + rhs = relation.substring_view(*index + inequality_operator.length()); + } else { + VERIFY_NOT_REACHED(); + } + + if (auto index = lhs.find(modulus_operator); index.has_value()) { + auto symbol = lhs.substring_view(0, *index); + VERIFY(symbol.length() == 1); + + auto modulus = lhs.substring_view(*index + modulus_operator.length()).to_uint(); + VERIFY(modulus.has_value()); + + parsed.symbol = symbol[0]; + parsed.modulus = move(modulus); + } else { + VERIFY(lhs.length() == 1); + parsed.symbol = lhs[0]; + } + + rhs.for_each_split_view(set_operator, false, [&](auto set) { + if (auto index = set.find(range_operator); index.has_value()) { + auto range_begin = set.substring_view(0, *index).to_uint(); + VERIFY(range_begin.has_value()); + + auto range_end = set.substring_view(*index + range_operator.length()).to_uint(); + VERIFY(range_end.has_value()); + + parsed.comparators.empend(Array { *range_begin, *range_end }); + } else { + auto value = set.to_uint(); + VERIFY(value.has_value()); + + parsed.comparators.empend(*value); + } + }); + + return parsed; +} + +// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax +// +// A very simplified view of a plural rule is: +// +// condition.* ([@integer|@decimal] sample)+ +// +// The "sample" being series of integer or decimal values that fit the specified condition. The +// condition may be one or more binary expressions, chained together with "and" or "or" operators. +static void parse_condition(StringView category, StringView rule, HashMap& rules) +{ + static constexpr auto other_category = "other"sv; + static constexpr auto disjunction_keyword = " or "sv; + static constexpr auto conjunction_keyword = " and "sv; + + // We don't need the examples in the generated code, so we can drop them here. + auto example_index = rule.find('@'); + VERIFY(example_index.has_value()); + + auto condition = rule.substring_view(0, *example_index).trim_whitespace(); + + // Our implementation does not generate rules for the "other" category. We simply return "other" + // for values that do not match any rules. This will need to be revisited if this VERIFY fails. + if (condition.is_empty()) { + VERIFY(category == other_category); + return; + } + + auto& relation_list = rules.ensure(category); + + // The grammar for a condition (i.e. a chain of relations) is: + // + // condition = and_condition ('or' and_condition)* + // and_condition = relation ('and' relation)* + // + // This affords some simplicity in that disjunctions are never embedded within a conjunction. + condition.for_each_split_view(disjunction_keyword, false, [&](auto disjunction) { + Vector conjunctions; + + disjunction.for_each_split_view(conjunction_keyword, false, [&](auto relation) { + conjunctions.append(parse_relation(relation)); + }); + + relation_list.relations.append(move(conjunctions)); + }); +} + +static ErrorOr parse_plural_rules(String core_supplemental_path, StringView file_name, UnicodeLocaleData& locale_data) +{ + static constexpr auto form_prefix = "plurals-type-"sv; + static constexpr auto rule_prefix = "pluralRule-count-"sv; + + LexicalPath plurals_path(move(core_supplemental_path)); + plurals_path = plurals_path.append(file_name); + + auto plurals = TRY(read_json_file(plurals_path.string())); + auto const& supplemental_object = plurals.as_object().get("supplemental"sv); + + supplemental_object.as_object().for_each_member([&](auto const& key, auto const& plurals_object) { + if (!key.starts_with(form_prefix)) + return; + + auto form = key.substring_view(form_prefix.length()); + + plurals_object.as_object().for_each_member([&](auto const& loc, auto const& rules) { + auto locale = locale_data.locales.get(loc); + if (!locale.has_value()) + return; + + rules.as_object().for_each_member([&](auto const& key, auto const& condition) { + VERIFY(key.starts_with(rule_prefix)); + + auto category = key.substring_view(rule_prefix.length()); + parse_condition(category, condition.as_string(), locale->rules_for_form(form)); + + if (!locale_data.categories.contains_slow(category)) + locale_data.categories.append(category); + }); + }); + }); + + return {}; +} + +static ErrorOr parse_all_locales(String core_path, String locale_names_path, UnicodeLocaleData& locale_data) +{ + auto identity_iterator = TRY(path_to_dir_iterator(move(locale_names_path))); + + LexicalPath core_supplemental_path(move(core_path)); + core_supplemental_path = core_supplemental_path.append("supplemental"sv); + VERIFY(Core::File::is_directory(core_supplemental_path.string())); + + auto remove_variants_from_path = [&](String path) -> ErrorOr { + auto parsed_locale = TRY(CanonicalLanguageID::parse(locale_data.unique_strings, LexicalPath::basename(path))); + + StringBuilder builder; + builder.append(locale_data.unique_strings.get(parsed_locale.language)); + if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty()) + builder.appendff("-{}", script); + if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty()) + builder.appendff("-{}", region); + + return builder.build(); + }; + + while (identity_iterator.has_next()) { + auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator)); + auto language = TRY(remove_variants_from_path(locale_path)); + + locale_data.locales.ensure(language); + } + + TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, locale_data)); + TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, locale_data)); + return {}; +} + +static ErrorOr generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + + generator.append(R"~~~( +#include + +#pragma once + +namespace Unicode { +)~~~"); + + generate_enum(generator, format_identifier, "PluralCategory"sv, {}, locale_data.categories); + + generator.append(R"~~~( +} +)~~~"); + + TRY(file.write(generator.as_string_view().bytes())); + return {}; +} + +static ErrorOr generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + + auto locales = locale_data.locales.keys(); + quick_sort(locales); + + generator.append(R"~~~( +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Unicode { + +using PluralCategoryFunction = PluralCategory(*)(PluralOperands); + +static PluralCategory default_category(PluralOperands) +{ + return PluralCategory::Other; +} + +)~~~"); + + auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values) { + HashValueMap hashes; + hashes.ensure_capacity(values.size()); + + for (auto const& value : values) + hashes.set(value.hash(), format_identifier(enum_title, value)); + + generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes)); + generate_value_to_string(generator, "{}_to_string"sv, enum_title, enum_snake, format_identifier, values); + }; + + auto append_rules = [&](auto form, auto const& locale, auto const& rules) { + if (rules.is_empty()) + return; + + generator.set("method"sv, Locale::generated_method_name(form, locale)); + HashTable generated_variables; + + generator.append(R"~~~( +static PluralCategory @method@([[maybe_unused]] PluralOperands ops) +{)~~~"); + + for (auto [category, condition] : rules) { + condition.generate_precomputed_variables(generator, generated_variables); + + generator.append(R"~~~( + if ()~~~"); + + generator.set("category"sv, format_identifier({}, category)); + condition.generate_condition(generator); + + generator.append(R"~~~() + return PluralCategory::@category@;)~~~"); + } + + generator.append(R"~~~( + return PluralCategory::Other; +} +)~~~"); + }; + + auto append_lookup_table = [&](auto form) { + generator.set("form"sv, form); + generator.set("size"sv, String::number(locales.size())); + + generator.append(R"~~~( +static constexpr Array s_@form@_functions { {)~~~"); + + for (auto const& locale : locales) { + auto& rules = locale_data.locales.find(locale)->value; + + if (rules.rules_for_form(form).is_empty()) { + generator.append(R"~~~( + default_category,)~~~"); + } else { + generator.set("method"sv, Locale::generated_method_name(form, locale)); + generator.append(R"~~~( + @method@,)~~~"); + } + } + + generator.append(R"~~~( +} }; +)~~~"); + }; + + append_string_conversions("PluralCategory"sv, "plural_category"sv, locale_data.categories); + + for (auto [locale, rules] : locale_data.locales) { + append_rules("cardinal"sv, locale, rules.cardinal_rules); + append_rules("ordinal"sv, locale, rules.ordinal_rules); + } + + append_lookup_table("cardinal"sv); + append_lookup_table("ordinal"sv); + + generator.append(R"~~~( +PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands) +{ + auto locale_value = locale_from_string(locale); + if (!locale_value.has_value()) + return PluralCategory::Other; + + auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. + PluralCategoryFunction decider { nullptr }; + + switch (form) { + case PluralForm::Cardinal: + decider = s_cardinal_functions[locale_index]; + break; + case PluralForm::Ordinal: + decider = s_ordinal_functions[locale_index]; + break; + } + + return decider(move(operands)); +} + +} +)~~~"); + + TRY(file.write(generator.as_string_view().bytes())); + return {}; +} + +ErrorOr serenity_main(Main::Arguments arguments) +{ + StringView generated_header_path; + StringView generated_implementation_path; + StringView core_path; + StringView locale_names_path; + + Core::ArgsParser args_parser; + args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); + args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); + args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); + args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); + args_parser.parse(arguments); + + auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); + auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); + + UnicodeLocaleData locale_data; + TRY(parse_all_locales(core_path, locale_names_path, locale_data)); + + TRY(generate_unicode_locale_header(*generated_header_file, locale_data)); + TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data)); + + return 0; +} diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 4f9c5527b3..701489311d 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -3,7 +3,7 @@ include(${SerenityOS_SOURCE_DIR}/Meta/CMake/unicode_data.cmake) if (DEFINED UNICODE_DATA_SOURCES) set(SOURCES ${UNICODE_DATA_SOURCES}) serenity_lib(LibUnicodeData unicodedata) - target_compile_options(LibUnicodeData PRIVATE -g0 -Os) + target_compile_options(LibUnicodeData PRIVATE -g0 -Os -Wno-parentheses-equality) target_link_libraries(LibUnicodeData LibCore LibTimeZone) endif() @@ -13,6 +13,7 @@ set(SOURCES DateTimeFormat.cpp Locale.cpp NumberFormat.cpp + PluralRules.cpp RelativeTimeFormat.cpp ) diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 4edea7b0f5..252da3bab5 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -38,6 +38,7 @@ enum class Locale : u16; enum class MinimumDaysRegion : u8; enum class Month : u8; enum class NumericSymbol : u8; +enum class PluralCategory : u8; enum class Property : u8; enum class Script : u8; enum class ScriptTag : u8; @@ -62,6 +63,7 @@ struct LocaleID; struct NumberFormat; struct NumberGroupings; struct OtherExtension; +struct PluralOperands; struct SpecialCasing; struct TransformedExtension; struct TransformedField; diff --git a/Userland/Libraries/LibUnicode/PluralRules.cpp b/Userland/Libraries/LibUnicode/PluralRules.cpp new file mode 100644 index 0000000000..a4b8a58a7f --- /dev/null +++ b/Userland/Libraries/LibUnicode/PluralRules.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#if ENABLE_UNICODE_DATA +# include +#endif + +namespace Unicode { + +#if !ENABLE_UNICODE_DATA +enum class PluralCategory : u8 { + Other, +}; +#endif + +PluralForm plural_form_from_string(StringView plural_form) +{ + if (plural_form == "cardinal"sv) + return PluralForm::Cardinal; + if (plural_form == "ordinal"sv) + return PluralForm::Ordinal; + VERIFY_NOT_REACHED(); +} + +StringView plural_form_to_string(PluralForm plural_form) +{ + switch (plural_form) { + case PluralForm::Cardinal: + return "cardinal"sv; + case PluralForm::Ordinal: + return "ordinal"sv; + default: + VERIFY_NOT_REACHED(); + } +} + +Optional __attribute__((weak)) plural_category_from_string(StringView category) +{ + VERIFY(category == "other"sv); + return PluralCategory::Other; +} + +StringView __attribute__((weak)) plural_category_to_string(PluralCategory category) +{ + VERIFY(category == PluralCategory::Other); + return "other"sv; +} + +PluralCategory __attribute__((weak)) determine_plural_category(StringView, PluralForm, PluralOperands) +{ + return PluralCategory::Other; +} + +} diff --git a/Userland/Libraries/LibUnicode/PluralRules.h b/Userland/Libraries/LibUnicode/PluralRules.h new file mode 100644 index 0000000000..d67f95b805 --- /dev/null +++ b/Userland/Libraries/LibUnicode/PluralRules.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace Unicode { + +enum class PluralForm { + Cardinal, + Ordinal, +}; + +// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Operand_Meanings +struct PluralOperands { + static constexpr StringView symbol_to_variable_name(char symbol) + { + if (symbol == 'n') + return "number"sv; + if (symbol == 'i') + return "integer_digits"sv; + if (symbol == 'f') + return "fraction_digits"sv; + if (symbol == 'v') + return "number_of_fraction_digits"sv; + if (symbol == 't') + return "fraction_digits_without_trailing"sv; + if (symbol == 'w') + return "number_of_fraction_digits_without_trailing"sv; + VERIFY_NOT_REACHED(); + } + + static constexpr bool symbol_requires_floating_point_modulus(char symbol) + { + // From TR-35: "The modulus (% or mod) is a remainder operation as defined in Java; for + // example, where n = 4.3 the result of n mod 3 is 1.3." + // + // So, this returns whether the symbol represents a decimal value, and thus requires fmod. + return symbol == 'n'; + } + + double number { 0 }; + u64 integer_digits { 0 }; + u64 fraction_digits { 0 }; + u64 number_of_fraction_digits { 0 }; + u64 fraction_digits_without_trailing { 0 }; + u64 number_of_fraction_digits_without_trailing { 0 }; +}; + +PluralForm plural_form_from_string(StringView plural_form); +StringView plural_form_to_string(PluralForm plural_form); + +Optional plural_category_from_string(StringView category); +StringView plural_category_to_string(PluralCategory category); + +PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands); + +}