From c65dea64bd9adaef6955981c5f633cca485218aa Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 13 Nov 2021 10:15:33 -0500 Subject: [PATCH] LibJS+LibUnicode: Don't remove {currency} keys in GetNumberFormatPattern In order to implement Intl.NumberFormat.prototype.formatToParts, do not replace {currency} keys in the format pattern before ECMA-402 tells us to. Otherwise, the array return by formatToParts will not contain the expected currency key. Early replacement was done to avoid resolving the currency display more than once, as it involves a couple of round trips to search through LibUnicode data. So this adds a non-standard method to NumberFormat to do this resolution and cache the result. Another side effect of this change is that LibUnicode must replace unit format patterns of the form "{0} {1}" during code generation. These were previously skipped during code generation because LibJS would just replace the keys with the currency display at runtime. But now that the currency display injection is delayed, any {0} or {1} keys in the format pattern will cause PartitionNumberPattern to abort. --- .../GenerateUnicodeNumberFormat.cpp | 20 ++---- .../LibJS/Runtime/Intl/NumberFormat.cpp | 68 +++++++++++-------- .../LibJS/Runtime/Intl/NumberFormat.h | 4 ++ Userland/Libraries/LibUnicode/Locale.cpp | 14 ++-- Userland/Libraries/LibUnicode/Locale.h | 2 +- 5 files changed, 58 insertions(+), 50 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp index fb19c3ae95..0c828ba5bc 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp @@ -79,24 +79,23 @@ struct UnicodeLocaleData { Vector numeric_symbols; }; -static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, NumberFormat& format, bool is_unit_pattern = false) +static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, NumberFormat& format) { // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns auto replace_patterns = [&](String pattern) { - if (is_unit_pattern) { - // Skip parsing unit patterns, which are of the form "{0} {1}", and do not contain any - // of the replacement patterns below. - return pattern; - } - static HashMap replacements = { + { "{0}"sv, "{number}"sv }, + { "{1}"sv, "{currency}"sv }, { "%"sv, "{percentSign}"sv }, { "+"sv, "{plusSign}"sv }, { "-"sv, "{minusSign}"sv }, { "ยค"sv, "{currency}"sv }, // U+00A4 Currency Sign }; + for (auto const& replacement : replacements) + pattern = pattern.replace(replacement.key, replacement.value, true); + if (auto start_number_index = pattern.find_any_of("#0"sv, String::SearchDirection::Forward); start_number_index.has_value()) { auto end_number_index = *start_number_index + 1; @@ -111,9 +110,6 @@ static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, pattern.substring_view(end_number_index)); } - for (auto const& replacement : replacements) - pattern = pattern.replace(replacement.key, replacement.value, true); - return pattern; }; @@ -173,18 +169,16 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& return; NumberFormat format {}; - bool is_unit_pattern = false; if (auto type = split_key[0].template to_uint(); type.has_value()) { VERIFY(*type % 10 == 0); format.magnitude = static_cast(log10(*type)); } else { VERIFY(split_key[0] == "unitPattern"sv); - is_unit_pattern = true; } format.plurality = NumberFormat::plurality_from_string(split_key[2]); - parse_number_pattern(value.as_string(), locale_data, format, is_unit_pattern); + parse_number_pattern(value.as_string(), locale_data, format); result.append(move(format)); }); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index ad5831e949..a28152f347 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -68,6 +68,8 @@ StringView NumberFormat::style_string() const void NumberFormat::set_currency_display(StringView currency_display) { + m_resolved_currency_display.clear(); + if (currency_display == "code"sv) m_currency_display = CurrencyDisplay::Code; else if (currency_display == "symbol"sv) @@ -80,6 +82,34 @@ void NumberFormat::set_currency_display(StringView currency_display) VERIFY_NOT_REACHED(); } +StringView NumberFormat::resolve_currency_display() +{ + if (m_resolved_currency_display.has_value()) + return *m_resolved_currency_display; + + switch (currency_display()) { + case NumberFormat::CurrencyDisplay::Code: + m_resolved_currency_display = currency(); + break; + case NumberFormat::CurrencyDisplay::Symbol: + m_resolved_currency_display = Unicode::get_locale_currency_mapping(data_locale(), currency(), Unicode::Style::Short); + break; + case NumberFormat::CurrencyDisplay::NarrowSymbol: + m_resolved_currency_display = Unicode::get_locale_currency_mapping(data_locale(), currency(), Unicode::Style::Narrow); + break; + case NumberFormat::CurrencyDisplay::Name: + m_resolved_currency_display = Unicode::get_locale_currency_mapping(data_locale(), currency(), Unicode::Style::Numeric); + break; + default: + VERIFY_NOT_REACHED(); + } + + if (!m_resolved_currency_display.has_value()) + m_resolved_currency_display = currency(); + + return *m_resolved_currency_display; +} + StringView NumberFormat::currency_display_string() const { VERIFY(m_currency_display.has_value()); @@ -664,8 +694,12 @@ Vector partition_number_pattern(NumberFormat& number_format, d // j. Else if p is equal to "currencyPrefix" and numberFormat.[[Style]] is "currency", then // k. Else if p is equal to "currencySuffix" and numberFormat.[[Style]] is "currency", then // - // Note: Our implementation formats currency codes during GetNumberFormatPattern so that we - // do not have to do currency display / plurality lookups more than once. + // Note: Our implementation manipulates the format string to inject/remove spacing around the + // currency code during GetNumberFormatPattern so that we do not have to do currency + // display / plurality lookups more than once. + else if ((part == "currency"sv) && (number_format.style() == NumberFormat::Style::Currency)) { + result.append({ "currency"sv, number_format.resolve_currency_display() }); + } // l. Else, else { @@ -1323,33 +1357,9 @@ Optional> get_number_format_pattern(NumberFormat& nu // we might need to mutate the format pattern to inject a space between the currency display and // the currency number. if (number_format.style() == NumberFormat::Style::Currency) { - if (number_format.currency_display() == NumberFormat::CurrencyDisplay::Name) { - auto maybe_currency_display = Unicode::get_locale_currency_mapping(number_format.data_locale(), number_format.currency(), Unicode::Style::Numeric); - auto currency_display = maybe_currency_display.value_or(number_format.currency()); - - return pattern.replace("{0}"sv, "{number}"sv).replace("{1}"sv, currency_display); - } - - Optional currency_display; - - switch (number_format.currency_display()) { - case NumberFormat::CurrencyDisplay::Code: - currency_display = number_format.currency(); - break; - case NumberFormat::CurrencyDisplay::Symbol: - currency_display = Unicode::get_locale_currency_mapping(number_format.data_locale(), number_format.currency(), Unicode::Style::Short); - break; - case NumberFormat::CurrencyDisplay::NarrowSymbol: - currency_display = Unicode::get_locale_currency_mapping(number_format.data_locale(), number_format.currency(), Unicode::Style::Narrow); - break; - default: - VERIFY_NOT_REACHED(); - } - - if (!currency_display.has_value()) - currency_display = number_format.currency(); - - return Unicode::create_currency_format_pattern(*currency_display, pattern); + auto modified_pattern = Unicode::augment_currency_format_pattern(number_format.resolve_currency_display(), pattern); + if (modified_pattern.has_value()) + return modified_pattern.release_value(); } // 16. Return pattern. diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h index 32719d63a3..62a90fdb63 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h @@ -97,6 +97,7 @@ public: CurrencyDisplay currency_display() const { return *m_currency_display; } StringView currency_display_string() const; void set_currency_display(StringView currency_display); + StringView resolve_currency_display(); bool has_currency_sign() const { return m_currency_sign.has_value(); } CurrencySign currency_sign() const { return *m_currency_sign; } @@ -177,6 +178,9 @@ private: Optional m_compact_display {}; // [[CompactDisplay]] SignDisplay m_sign_display { SignDisplay::Invalid }; // [[SignDisplay]] NativeFunction* m_bound_format { nullptr }; // [[BoundFormat]] + + // Non-standard. Stores the resolved currency display string based on [[Locale]], [[Currency]], and [[CurrencyDisplay]]. + Optional m_resolved_currency_display; }; struct FormatResult { diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index 411fe1d0ca..2080783399 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -997,7 +997,7 @@ Optional select_currency_unit_pattern(StringView locale, StringVie } // https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies -String create_currency_format_pattern(StringView currency_display, StringView base_pattern) +Optional augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern) { #if ENABLE_UNICODE_DATA constexpr auto number_key = "{number}"sv; @@ -1011,7 +1011,7 @@ String create_currency_format_pattern(StringView currency_display, StringView ba VERIFY(currency_index.has_value()); Utf8View utf8_currency_display { currency_display }; - Optional currency_display_with_spacing; + Optional currency_key_with_spacing; auto last_code_point = [](StringView string) { Utf8View utf8_string { string }; @@ -1030,7 +1030,7 @@ String create_currency_format_pattern(StringView currency_display, StringView ba u32 first_currency_code_point = *utf8_currency_display.begin(); if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol)) - currency_display_with_spacing = String::formatted("{}{}", spacing, currency_display); + currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key); } } else { u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index)); @@ -1039,15 +1039,15 @@ String create_currency_format_pattern(StringView currency_display, StringView ba u32 last_currency_code_point = last_code_point(currency_display); if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol)) - currency_display_with_spacing = String::formatted("{}{}", currency_display, spacing); + currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing); } } - if (currency_display_with_spacing.has_value()) - return base_pattern.replace(currency_key, *currency_display_with_spacing); + if (currency_key_with_spacing.has_value()) + return base_pattern.replace(currency_key, *currency_key_with_spacing); #endif - return base_pattern.replace(currency_key, currency_display); + return {}; } String LanguageID::to_string() const diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index f205739a81..9e328b2738 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -196,6 +196,6 @@ Optional remove_likely_subtags(LanguageID const& language_id); String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias); Optional select_currency_unit_pattern(StringView locale, StringView system, double number); -String create_currency_format_pattern(StringView currency_display, StringView base_pattern); +Optional augment_currency_format_pattern(StringView currency_display, StringView base_pattern); }