From 1f546476d59b59a69b1ae0fa9101b405e35034b5 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 15 Nov 2021 07:56:20 -0500 Subject: [PATCH] LibJS+LibUnicode: Fix computation of compact pattern exponents The compact scale of each formatting rule was precomputed in commit: be69eae651abf0cc3e9cd0906f9586fdfbfb68ef Using the formula: compact scale = magnitude - pattern scale This computation was off-by-one. For example, consider the format key "10000-count-one", which maps to "00 thousand" in en-US. What we are really after is the exponent that best represents the string "thousand" for values greater than 10000 and less than 100000 (the next format key). We were previously doing: log10(10000) - "00 thousand".count("0") = 2 Which clearly isn't what we want. Instead, if we do: log10(10000) + 1 - "00 thousand".count("0") = 3 We get the correct exponent for each format key for each locale. This commit also renames the generated variable from "compact_scale" to "exponent" to match the terminology used in ECMA-402. --- .../GenerateUnicodeNumberFormat.cpp | 42 +++++++++---------- .../LibJS/Runtime/Intl/NumberFormat.cpp | 2 +- Userland/Libraries/LibUnicode/Locale.h | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp index b90cf274da..544693b91f 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp @@ -90,10 +90,12 @@ struct UnicodeLocaleData { Vector numeric_symbols; }; -static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr) +static void parse_number_pattern(Vector patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr) { // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns + VERIFY((patterns.size() == 1) || (patterns.size() == 2)); + auto replace_patterns = [&](String pattern) { static HashMap replacements = { { "{0}"sv, "{number}"sv }, @@ -183,16 +185,6 @@ static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, return pattern; }; - auto patterns = pattern.split(';'); - VERIFY((patterns.size() == 1) || (patterns.size() == 2)); - - if (format.magnitude != 0) { - auto number_of_zeroes_in_pattern = patterns[0].count("0"sv); - - VERIFY(format.magnitude >= number_of_zeroes_in_pattern); - format.compact_scale = format.magnitude - number_of_zeroes_in_pattern; - } - auto zero_format = replace_patterns(move(patterns[0])); format.positive_format_index = locale_data.unique_strings.ensure(String::formatted("{{plusSign}}{}", zero_format)); @@ -238,17 +230,25 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& if (split_key.size() != 3) return; + auto patterns = value.as_string().split(';'); NumberFormat format {}; if (auto type = split_key[0].template to_uint(); type.has_value()) { VERIFY(*type % 10 == 0); format.magnitude = static_cast(log10(*type)); + + if (patterns[0] != "0"sv) { + auto number_of_zeroes_in_pattern = patterns[0].count("0"sv); + VERIFY(format.magnitude >= number_of_zeroes_in_pattern); + + format.exponent = format.magnitude + 1 - number_of_zeroes_in_pattern; + } } else { VERIFY(split_key[0] == "unitPattern"sv); } format.plurality = NumberFormat::plurality_from_string(split_key[2]); - parse_number_pattern(value.as_string(), locale_data, NumberFormatType::Compact, format); + parse_number_pattern(move(patterns), locale_data, NumberFormatType::Compact, format); result.append(move(format)); }); @@ -279,7 +279,7 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system); + parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system); auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv); number_system.decimal_long_formats = parse_number_format(long_format.as_object()); @@ -291,10 +291,10 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.currency_format); + parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.currency_format); format_object = value.as_object().get("accounting"sv); - parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.accounting_format); + parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.accounting_format); number_system.currency_unit_formats = parse_number_format(value.as_object()); @@ -307,13 +307,13 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.percent_format); + parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.percent_format); } else if (key.starts_with(scientific_formats_prefix)) { auto system = key.substring(scientific_formats_prefix.length()); auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.scientific_format); + parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.scientific_format); } }); } @@ -424,7 +424,7 @@ struct NumberFormat { Unicode::NumberFormat number_format {}; number_format.magnitude = magnitude; - number_format.compact_scale = compact_scale; + number_format.exponent = exponent; number_format.plurality = static_cast(plurality); number_format.zero_format = s_string_list[zero_format]; number_format.positive_format = s_string_list[positive_format]; @@ -435,7 +435,7 @@ struct NumberFormat { } u8 magnitude { 0 }; - u8 compact_scale { 0 }; + u8 exponent { 0 }; u8 plurality { 0 }; @string_index_type@ zero_format { 0 }; @string_index_type@ positive_format { 0 }; @@ -466,13 +466,13 @@ struct NumberSystem { auto append_number_format = [&](auto const& number_format) { generator.set("magnitude"sv, String::number(number_format.magnitude)); - generator.set("compact_scale"sv, String::number(number_format.compact_scale)); + generator.set("exponent"sv, String::number(number_format.exponent)); generator.set("plurality"sv, String::number(static_cast(number_format.plurality))); generator.set("zero_format"sv, String::number(number_format.zero_format_index)); generator.set("positive_format"sv, String::number(number_format.positive_format_index)); generator.set("negative_format"sv, String::number(number_format.negative_format_index)); generator.set("compact_identifier"sv, String::number(number_format.compact_identifier_index)); - generator.append("{ @magnitude@, @compact_scale@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @compact_identifier@ },"); + generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @compact_identifier@ },"); }; auto append_number_formats = [&](String name, auto const& number_formats) { diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index 492cf73991..05be1bffbf 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -1599,7 +1599,7 @@ int compute_exponent_for_magniude(NumberFormat& number_format, int magnitude) best_number_format = &format_rule; } - return best_number_format ? best_number_format->compact_scale : 0; + return best_number_format ? best_number_format->exponent : 0; } default: diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index 6530e30468..172a5bb48c 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -117,7 +117,7 @@ struct NumberFormat { }; u8 magnitude { 0 }; - u8 compact_scale { 0 }; + u8 exponent { 0 }; Plurality plurality { Plurality::Other }; StringView zero_format {}; StringView positive_format {};