mirror of
https://github.com/RGBCube/serenity
synced 2025-07-23 10:37:41 +00:00
LibUnicode: Implement the range pattern processing algorithm
This algorithm is to inject spacing around the range separator under certain conditions. For example, in en-US, the range [3, 5] should be formatted as "3–5" if unitless, but as "$3 – $5" for currency.
This commit is contained in:
parent
b2709f161e
commit
0a6363d3e9
2 changed files with 45 additions and 10 deletions
|
@ -50,6 +50,17 @@ String replace_digits_for_number_system(StringView system, StringView number)
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 last_code_point(StringView string)
|
||||||
|
{
|
||||||
|
Utf8View utf8_string { string };
|
||||||
|
u32 code_point = 0;
|
||||||
|
|
||||||
|
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
|
||||||
|
code_point = *it;
|
||||||
|
|
||||||
|
return code_point;
|
||||||
|
}
|
||||||
|
|
||||||
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
||||||
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
|
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
|
||||||
{
|
{
|
||||||
|
@ -67,16 +78,6 @@ Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView cur
|
||||||
Utf8View utf8_currency_display { currency_display };
|
Utf8View utf8_currency_display { currency_display };
|
||||||
Optional<String> currency_key_with_spacing;
|
Optional<String> currency_key_with_spacing;
|
||||||
|
|
||||||
auto last_code_point = [](StringView string) {
|
|
||||||
Utf8View utf8_string { string };
|
|
||||||
u32 code_point = 0;
|
|
||||||
|
|
||||||
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
|
|
||||||
code_point = *it;
|
|
||||||
|
|
||||||
return code_point;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (*number_index < *currency_index) {
|
if (*number_index < *currency_index) {
|
||||||
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
|
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
|
||||||
|
|
||||||
|
@ -104,4 +105,37 @@ Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView cur
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
|
||||||
|
Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper)
|
||||||
|
{
|
||||||
|
#if ENABLE_UNICODE_DATA
|
||||||
|
auto range_pattern_with_spacing = [&]() {
|
||||||
|
return String::formatted(" {} ", range_separator);
|
||||||
|
};
|
||||||
|
|
||||||
|
Utf8View utf8_range_separator { range_separator };
|
||||||
|
Utf8View utf8_upper { upper };
|
||||||
|
|
||||||
|
// NOTE: Our implementation does the prescribed checks backwards for simplicity.
|
||||||
|
|
||||||
|
// To determine whether to add spacing, the currently recommended heuristic is:
|
||||||
|
// 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
|
||||||
|
for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
|
||||||
|
if (code_point_has_property(*it, Property::White_Space))
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
|
||||||
|
if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
|
||||||
|
if (!code_point_has_general_category(*it, GeneralCategory::Decimal_Number))
|
||||||
|
return range_pattern_with_spacing();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!code_point_has_general_category(last_code_point(lower), GeneralCategory::Decimal_Number))
|
||||||
|
return range_pattern_with_spacing();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,5 +71,6 @@ Vector<NumberFormat> get_compact_number_system_formats(StringView locale, String
|
||||||
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
|
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
|
||||||
|
|
||||||
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
|
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
|
||||||
|
Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue