mirror of
https://github.com/RGBCube/serenity
synced 2025-06-01 10:28:13 +00:00
LibJS+LibUnicode: Fully implement currency number formatting
Currencies are a bit strange; the layout of currency data in the CLDR is not particularly compatible with what ECMA-402 expects. For example, the currency format in the "en" and "ar" locales for the Latin script are: en: "¤#,##0.00" ar: "¤\u00A0#,##0.00" Note how the "ar" locale has a non-breaking space after the currency symbol (¤), but "en" does not. This does not mean that this space will appear in the "ar"-formatted string, nor does it mean that a space won't appear in the "en"-formatted string. This is a runtime decision based on the currency display chosen by the user ("$" vs. "USD" vs. "US dollar") and other rules in the Unicode TR-35 spec. ECMA-402 shies away from the nuances here with "implementation-defined" steps. LibUnicode will store the data parsed from the CLDR however it is presented; making decisions about spacing, etc. will occur at runtime based on user input.
This commit is contained in:
parent
e9493a2cd5
commit
a701ed52fc
6 changed files with 454 additions and 30 deletions
|
@ -8,6 +8,8 @@
|
|||
#include <AK/GenericLexer.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
|
||||
#if ENABLE_UNICODE_DATA
|
||||
|
@ -963,6 +965,79 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language
|
|||
return aliases[0].to_string();
|
||||
}
|
||||
|
||||
Optional<NumberFormat> select_currency_unit_pattern(StringView locale, StringView system, double number)
|
||||
{
|
||||
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
|
||||
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
||||
// Once those rules are implemented for LibJS, we better use them instead.
|
||||
auto formats = get_compact_number_system_formats(locale, system, CompactNumberFormatType::CurrencyUnit);
|
||||
|
||||
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
|
||||
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
|
||||
return *it;
|
||||
return {};
|
||||
};
|
||||
|
||||
if (number == 0) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 1) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 2) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
|
||||
return patterns;
|
||||
} else {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
|
||||
return patterns;
|
||||
}
|
||||
|
||||
return find_plurality(NumberFormat::Plurality::Other);
|
||||
}
|
||||
|
||||
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
||||
String create_currency_format_pattern(StringView currency_display, StringView base_pattern)
|
||||
{
|
||||
constexpr auto number_key = "{number}"sv;
|
||||
constexpr auto currency_key = "{currency}"sv;
|
||||
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
|
||||
|
||||
auto number_index = base_pattern.find(number_key);
|
||||
VERIFY(number_index.has_value());
|
||||
|
||||
auto currency_index = base_pattern.find(currency_key);
|
||||
VERIFY(currency_index.has_value());
|
||||
|
||||
static auto symbol_category = general_category_from_string("Symbol"sv);
|
||||
VERIFY(symbol_category.has_value()); // This shouldn't be reached if Unicode generation is disabled.
|
||||
|
||||
Utf8View utf8_currency_display { currency_display };
|
||||
Optional<String> currency_display_with_spacing;
|
||||
|
||||
if (*number_index < *currency_index) {
|
||||
if (!base_pattern.substring_view(0, *currency_index).ends_with(spacing)) {
|
||||
u32 first_currency_code_point = *utf8_currency_display.begin();
|
||||
|
||||
if (!code_point_has_general_category(first_currency_code_point, *symbol_category))
|
||||
currency_display_with_spacing = String::formatted("{}{}", spacing, currency_display);
|
||||
}
|
||||
} else {
|
||||
if (!base_pattern.substring_view(0, *number_index).ends_with(spacing)) {
|
||||
u32 last_currency_code_point = 0;
|
||||
for (auto it = utf8_currency_display.begin(); it != utf8_currency_display.end(); ++it)
|
||||
last_currency_code_point = *it;
|
||||
|
||||
if (!code_point_has_general_category(last_currency_code_point, *symbol_category))
|
||||
currency_display_with_spacing = String::formatted("{}{}", currency_display, spacing);
|
||||
}
|
||||
}
|
||||
|
||||
if (currency_display_with_spacing.has_value())
|
||||
return base_pattern.replace(currency_key, *currency_display_with_spacing);
|
||||
|
||||
return base_pattern.replace(currency_key, currency_display);
|
||||
}
|
||||
|
||||
String LanguageID::to_string() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
|
|
@ -195,4 +195,7 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id);
|
|||
Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id);
|
||||
String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
|
||||
|
||||
Optional<NumberFormat> select_currency_unit_pattern(StringView locale, StringView system, double number);
|
||||
String create_currency_format_pattern(StringView currency_display, StringView base_pattern);
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue