1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-06-01 10:28:13 +00:00

LibJS+LibUnicode: Fully implement currency number formatting

Currencies are a bit strange; the layout of currency data in the CLDR is
not particularly compatible with what ECMA-402 expects. For example, the
currency format in the "en" and "ar" locales for the Latin script are:

    en: "¤#,##0.00"
    ar: "¤\u00A0#,##0.00"

Note how the "ar" locale has a non-breaking space after the currency
symbol (¤), but "en" does not. This does not mean that this space will
appear in the "ar"-formatted string, nor does it mean that a space won't
appear in the "en"-formatted string. This is a runtime decision based on
the currency display chosen by the user ("$" vs. "USD" vs. "US dollar")
and other rules in the Unicode TR-35 spec.

ECMA-402 shies away from the nuances here with "implementation-defined"
steps. LibUnicode will store the data parsed from the CLDR however it is
presented; making decisions about spacing, etc. will occur at runtime
based on user input.
This commit is contained in:
Timothy Flynn 2021-11-12 23:16:37 -05:00 committed by Linus Groh
parent e9493a2cd5
commit a701ed52fc
6 changed files with 454 additions and 30 deletions

View file

@ -8,6 +8,8 @@
#include <AK/GenericLexer.h>
#include <AK/QuickSort.h>
#include <AK/StringBuilder.h>
#include <AK/Utf8View.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/Locale.h>
#if ENABLE_UNICODE_DATA
@ -963,6 +965,79 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language
return aliases[0].to_string();
}
Optional<NumberFormat> select_currency_unit_pattern(StringView locale, StringView system, double number)
{
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
// Once those rules are implemented for LibJS, we better use them instead.
auto formats = get_compact_number_system_formats(locale, system, CompactNumberFormatType::CurrencyUnit);
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
return *it;
return {};
};
if (number == 0) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
return patterns;
} else if (number == 1) {
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
return patterns;
} else if (number == 2) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
return patterns;
} else {
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
return patterns;
}
return find_plurality(NumberFormat::Plurality::Other);
}
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
String create_currency_format_pattern(StringView currency_display, StringView base_pattern)
{
constexpr auto number_key = "{number}"sv;
constexpr auto currency_key = "{currency}"sv;
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
auto number_index = base_pattern.find(number_key);
VERIFY(number_index.has_value());
auto currency_index = base_pattern.find(currency_key);
VERIFY(currency_index.has_value());
static auto symbol_category = general_category_from_string("Symbol"sv);
VERIFY(symbol_category.has_value()); // This shouldn't be reached if Unicode generation is disabled.
Utf8View utf8_currency_display { currency_display };
Optional<String> currency_display_with_spacing;
if (*number_index < *currency_index) {
if (!base_pattern.substring_view(0, *currency_index).ends_with(spacing)) {
u32 first_currency_code_point = *utf8_currency_display.begin();
if (!code_point_has_general_category(first_currency_code_point, *symbol_category))
currency_display_with_spacing = String::formatted("{}{}", spacing, currency_display);
}
} else {
if (!base_pattern.substring_view(0, *number_index).ends_with(spacing)) {
u32 last_currency_code_point = 0;
for (auto it = utf8_currency_display.begin(); it != utf8_currency_display.end(); ++it)
last_currency_code_point = *it;
if (!code_point_has_general_category(last_currency_code_point, *symbol_category))
currency_display_with_spacing = String::formatted("{}{}", currency_display, spacing);
}
}
if (currency_display_with_spacing.has_value())
return base_pattern.replace(currency_key, *currency_display_with_spacing);
return base_pattern.replace(currency_key, currency_display);
}
String LanguageID::to_string() const
{
StringBuilder builder;

View file

@ -195,4 +195,7 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id);
Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id);
String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
Optional<NumberFormat> select_currency_unit_pattern(StringView locale, StringView system, double number);
String create_currency_format_pattern(StringView currency_display, StringView base_pattern);
}