From 2a7f36b392479dbb41bb3fac6512c9b176c175ec Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 11 Dec 2021 00:37:34 -0500 Subject: [PATCH] LibJS+LibUnicode: Generate unique numeric symbol lists There are 443 number system objects generated, each of which held an array of number system symbols. Of those 443 arrays, only 39 are unique. To uniquely store these, this change moves the generated NumericSymbol enumeration to the public LibUnicode/NumberFormat.h header with a pre- defined set of symbols that we need. This is to ensure the generated, unique arrays are created in a known order with known symbols. While it is unfortunate to no longer discover these symbols at generation time, it does allow us to ignore unwanted symbols and perform less string-to- enumeration conversions at lookup time. --- .../GenerateUnicodeNumberFormat.cpp | 110 +++++++++--------- .../LibJS/Runtime/Intl/DateTimeFormat.cpp | 2 +- .../LibJS/Runtime/Intl/NumberFormat.cpp | 18 +-- .../Libraries/LibUnicode/NumberFormat.cpp | 2 +- Userland/Libraries/LibUnicode/NumberFormat.h | 13 ++- 5 files changed, 79 insertions(+), 66 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp index b5af991636..9b40a82bd4 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp @@ -38,6 +38,9 @@ constexpr auto s_number_format_index_type = "u16"sv; using NumberFormatListIndexType = u16; constexpr auto s_number_format_list_index_type = "u16"sv; +using NumericSymbolListIndexType = u8; +constexpr auto s_numeric_symbol_list_index_type = "u8"sv; + enum class NumberFormatType { Standard, Compact, @@ -135,9 +138,11 @@ struct AK::Traits : public GenericTraits { } }; +using NumericSymbolList = Vector; + struct NumberSystem { StringIndexType system { 0 }; - HashMap symbols {}; + NumericSymbolListIndexType symbols { 0 }; u8 primary_grouping_size { 0 }; u8 secondary_grouping_size { 0 }; @@ -171,9 +176,9 @@ struct UnicodeLocaleData { UniqueStringStorage unique_strings; UniqueStorage unique_formats; UniqueStorage unique_format_lists; + UniqueStorage unique_symbols; HashMap locales; - Vector numeric_symbols; size_t max_identifier_count { 0 }; }; @@ -370,6 +375,26 @@ static ErrorOr parse_number_systems(String locale_numbers_path, UnicodeLoc return locale_data.unique_format_lists.ensure(move(result)); }; + auto numeric_symbol_from_string = [&](StringView numeric_symbol) -> Optional { + if (numeric_symbol == "decimal"sv) + return Unicode::NumericSymbol::Decimal; + if (numeric_symbol == "exponential"sv) + return Unicode::NumericSymbol::Exponential; + if (numeric_symbol == "group"sv) + return Unicode::NumericSymbol::Group; + if (numeric_symbol == "infinity"sv) + return Unicode::NumericSymbol::Infinity; + if (numeric_symbol == "minusSign"sv) + return Unicode::NumericSymbol::MinusSign; + if (numeric_symbol == "nan"sv) + return Unicode::NumericSymbol::NaN; + if (numeric_symbol == "percentSign"sv) + return Unicode::NumericSymbol::PercentSign; + if (numeric_symbol == "plusSign"sv) + return Unicode::NumericSymbol::PlusSign; + return {}; + }; + locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { constexpr auto symbols_prefix = "symbols-numberSystem-"sv; constexpr auto decimal_formats_prefix = "decimalFormats-numberSystem-"sv; @@ -381,13 +406,21 @@ static ErrorOr parse_number_systems(String locale_numbers_path, UnicodeLoc auto system = key.substring(symbols_prefix.length()); auto& number_system = ensure_number_system(system); - value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) { - auto symbol_index = locale_data.unique_strings.ensure(localization.as_string()); - number_system.symbols.set(symbol, symbol_index); + NumericSymbolList symbols; - if (!locale_data.numeric_symbols.contains_slow(symbol)) - locale_data.numeric_symbols.append(symbol); + value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) { + auto numeric_symbol = numeric_symbol_from_string(symbol); + if (!numeric_symbol.has_value()) + return; + + if (to_underlying(*numeric_symbol) >= symbols.size()) + symbols.resize(to_underlying(*numeric_symbol) + 1); + + auto symbol_index = locale_data.unique_strings.ensure(localization.as_string()); + symbols[to_underlying(*numeric_symbol)] = symbol_index; }); + + number_system.symbols = locale_data.unique_symbols.ensure(move(symbols)); } else if (key.starts_with(decimal_formats_prefix)) { auto system = key.substring(decimal_formats_prefix.length()); auto& number_system = ensure_number_system(system); @@ -571,18 +604,7 @@ static ErrorOr parse_all_locales(String numbers_path, String units_path, U return {}; } -static String format_identifier(StringView owner, String identifier) -{ - identifier = identifier.replace("-"sv, "_"sv, true); - - if (all_of(identifier, is_ascii_digit)) - return String::formatted("{}_{}", owner[0], identifier); - if (is_ascii_lower_alpha(identifier[0])) - return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); - return identifier; -} - -static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data) +static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData&) { StringBuilder builder; SourceGenerator generator { builder }; @@ -592,19 +614,16 @@ static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& #include #include -#include #include #include namespace Unicode { )~~~"); - generate_enum(generator, format_identifier, "NumericSymbol"sv, {}, locale_data.numeric_symbols); - generator.append(R"~~~( namespace Detail { -Optional get_number_system_symbol(StringView locale, StringView system, StringView numeric_symbol); +Optional get_number_system_symbol(StringView locale, StringView system, Unicode::NumericSymbol symbol); Optional get_number_system_groupings(StringView locale, StringView system); Optional get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type); Vector get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type); @@ -626,7 +645,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca generator.set("string_index_type"sv, s_string_index_type); generator.set("number_format_index_type"sv, s_number_format_index_type); generator.set("number_format_list_index_type"sv, s_number_format_list_index_type); - generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size())); + generator.set("numeric_symbol_list_index_type"sv, s_numeric_symbol_list_index_type); generator.set("identifier_count", String::number(locale_data.max_identifier_count)); generator.append(R"~~~( @@ -672,7 +691,7 @@ struct NumberFormat { struct NumberSystem { @string_index_type@ system { 0 }; - Array<@string_index_type@, @numeric_symbols_size@> symbols {}; + @numeric_symbol_list_index_type@ symbols { 0 }; u8 primary_grouping_size { 0 }; u8 secondary_grouping_size { 0 }; @@ -700,6 +719,7 @@ struct Unit { locale_data.unique_formats.generate(generator, "NumberFormat"sv, "s_number_formats"sv, 10); locale_data.unique_format_lists.generate(generator, s_number_format_index_type, "s_number_format_lists"sv); + locale_data.unique_symbols.generate(generator, s_string_index_type, "s_numeric_symbol_lists"sv); auto append_number_systems = [&](String name, auto const& number_systems) { generator.set("name", name); @@ -710,6 +730,7 @@ static constexpr Array @name@ { {)~~~"); for (auto const& number_system : number_systems) { generator.set("system"sv, String::number(number_system.value.system)); + generator.set("symbols"sv, String::number(number_system.value.symbols)); generator.set("primary_grouping_size"sv, String::number(number_system.value.primary_grouping_size)); generator.set("secondary_grouping_size"sv, String::number(number_system.value.secondary_grouping_size)); generator.set("decimal_format", String::number(number_system.value.decimal_format)); @@ -722,16 +743,8 @@ static constexpr Array @name@ { {)~~~"); generator.set("percent_format", String::number(number_system.value.percent_format)); generator.set("scientific_format", String::number(number_system.value.scientific_format)); - generator.append(R"~~~( - { @system@, {)~~~"); - - for (auto const& symbol : locale_data.numeric_symbols) { - auto index = number_system.value.symbols.get(symbol).value_or(0); - generator.set("index", String::number(index)); - generator.append(" @index@,"); - } - - generator.append(" }, @primary_grouping_size@, @secondary_grouping_size@, "); + generator.append("\n { "); + generator.append("@system@, @symbols@, @primary_grouping_size@, @secondary_grouping_size@, "); generator.append("@decimal_format@, @decimal_long_formats@, @decimal_short_formats@, "); generator.append("@currency_format@, @accounting_format@, @currency_unit_formats@, @currency_short_formats@, "); generator.append("@percent_format@, @scientific_format@ },"); @@ -767,18 +780,6 @@ static constexpr Array @name@ { {)~~~"); generate_mapping(generator, locale_data.locales, "NumberSystem"sv, "s_number_systems"sv, "s_number_systems_{}", [&](auto const& name, auto const& value) { append_number_systems(name, value.number_systems); }); generate_mapping(generator, locale_data.locales, "Unit"sv, "s_units"sv, "s_units_{}", [&](auto const& name, auto const& value) { append_units(name, value.units); }); - auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values) { - HashValueMap hashes; - hashes.ensure_capacity(values.size()); - - for (auto const& value : values) - hashes.set(value.hash(), format_identifier(enum_title, value)); - - generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes)); - }; - - append_from_string("NumericSymbol"sv, "numeric_symbol"sv, locale_data.numeric_symbols); - generator.append(R"~~~( static NumberSystem const* find_number_system(StringView locale, StringView system) { @@ -797,15 +798,16 @@ static NumberSystem const* find_number_system(StringView locale, StringView syst return nullptr; } -Optional get_number_system_symbol(StringView locale, StringView system, StringView symbol) +Optional get_number_system_symbol(StringView locale, StringView system, Unicode::NumericSymbol symbol) { - auto symbol_value = numeric_symbol_from_string(symbol); - if (!symbol_value.has_value()) - return {}; - if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) { - auto symbol_index = to_underlying(*symbol_value); - return s_string_list[number_system->symbols[symbol_index]]; + auto symbols = s_numeric_symbol_lists.at(number_system->symbols); + + auto symbol_index = to_underlying(symbol); + if (symbol_index >= symbols.size()) + return {}; + + return s_string_list[symbols[symbol_index]]; } return {}; diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp index 705388477b..b870464eba 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp @@ -1021,7 +1021,7 @@ ThrowCompletionOr> format_date_time_pattern(GlobalObjec // Non-standard, TR-35 requires the decimal separator before injected {fractionalSecondDigits} partitions // to adhere to the selected locale. This depends on other generated data, so it is deferred to here. else if (part == "decimal"sv) { - auto decimal_symbol = Unicode::get_number_system_symbol(data_locale, date_time_format.numbering_system(), "decimal"sv).value_or("."sv); + auto decimal_symbol = Unicode::get_number_system_symbol(data_locale, date_time_format.numbering_system(), Unicode::NumericSymbol::Decimal).value_or("."sv); result.append({ "literal"sv, decimal_symbol }); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index 2de65ec445..84ba644235 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -605,12 +605,12 @@ Vector partition_number_pattern(NumberFormat& number_format, d // 2. If x is NaN, then if (Value(number).is_nan()) { // a. Let n be an implementation- and locale-dependent (ILD) String value indicating the NaN value. - formatted_string = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "nan"sv).value_or("NaN"sv); + formatted_string = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::NaN).value_or("NaN"sv); } // 3. Else if x is a non-finite Number, then else if (!Value(number).is_finite_number()) { // a. Let n be an ILD String value indicating infinity. - formatted_string = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "infinity"sv).value_or("infinity"sv); + formatted_string = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::Infinity).value_or("infinity"sv); } // 4. Else, else { @@ -669,7 +669,7 @@ Vector partition_number_pattern(NumberFormat& number_format, d // d. Else if p is equal to "plusSign", then else if (part == "plusSign"sv) { // i. Let plusSignSymbol be the ILND String representing the plus sign. - auto plus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "plusSign"sv).value_or("+"sv); + auto plus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::PlusSign).value_or("+"sv); // ii. Append a new Record { [[Type]]: "plusSign", [[Value]]: plusSignSymbol } as the last element of result. result.append({ "plusSign"sv, plus_sign_symbol }); } @@ -677,7 +677,7 @@ Vector partition_number_pattern(NumberFormat& number_format, d // e. Else if p is equal to "minusSign", then else if (part == "minusSign"sv) { // i. Let minusSignSymbol be the ILND String representing the minus sign. - auto minus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "minusSign"sv).value_or("-"sv); + auto minus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::MinusSign).value_or("-"sv); // ii. Append a new Record { [[Type]]: "minusSign", [[Value]]: minusSignSymbol } as the last element of result. result.append({ "minusSign"sv, minus_sign_symbol }); } @@ -685,7 +685,7 @@ Vector partition_number_pattern(NumberFormat& number_format, d // f. Else if p is equal to "percentSign" and numberFormat.[[Style]] is "percent", then else if ((part == "percentSign"sv) && (number_format.style() == NumberFormat::Style::Percent)) { // i. Let percentSignSymbol be the ILND String representing the percent sign. - auto percent_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "percentSign"sv).value_or("%"sv); + auto percent_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::PercentSign).value_or("%"sv); // ii. Append a new Record { [[Type]]: "percentSign", [[Value]]: percentSignSymbol } as the last element of result. result.append({ "percentSign"sv, percent_sign_symbol }); } @@ -937,7 +937,7 @@ Vector partition_notation_sub_pattern(NumberFormat& number_for // 6. If the numberFormat.[[UseGrouping]] is true, then if (use_grouping) { // a. Let groupSepSymbol be the implementation-, locale-, and numbering system-dependent (ILND) String representing the grouping separator. - auto group_sep_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "group"sv).value_or(","sv); + auto group_sep_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::Group).value_or(","sv); // b. Let groups be a List whose elements are, in left to right order, the substrings defined by ILND set of locations within the integer. auto groups = separate_integer_into_groups(*grouping_sizes, integer); @@ -969,7 +969,7 @@ Vector partition_notation_sub_pattern(NumberFormat& number_for // 8. If fraction is not undefined, then if (fraction.has_value()) { // a. Let decimalSepSymbol be the ILND String representing the decimal separator. - auto decimal_sep_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "decimal"sv).value_or("."sv); + auto decimal_sep_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::Decimal).value_or("."sv); // b. Append a new Record { [[Type]]: "decimal", [[Value]]: decimalSepSymbol } as the last element of result. result.append({ "decimal"sv, decimal_sep_symbol }); // c. Append a new Record { [[Type]]: "fraction", [[Value]]: fraction } as the last element of result. @@ -993,7 +993,7 @@ Vector partition_notation_sub_pattern(NumberFormat& number_for // vi. Else if p is equal to "scientificSeparator", then else if (part == "scientificSeparator"sv) { // 1. Let scientificSeparator be the ILND String representing the exponent separator. - auto scientific_separator = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "exponential"sv).value_or("E"sv); + auto scientific_separator = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::Exponential).value_or("E"sv); // 2. Append a new Record { [[Type]]: "exponentSeparator", [[Value]]: scientificSeparator } as the last element of result. result.append({ "exponentSeparator"sv, scientific_separator }); } @@ -1002,7 +1002,7 @@ Vector partition_notation_sub_pattern(NumberFormat& number_for // 1. If exponent < 0, then if (exponent < 0) { // a. Let minusSignSymbol be the ILND String representing the minus sign. - auto minus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), "minusSign"sv).value_or("-"sv); + auto minus_sign_symbol = Unicode::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), Unicode::NumericSymbol::MinusSign).value_or("-"sv); // b. Append a new Record { [[Type]]: "exponentMinusSign", [[Value]]: minusSignSymbol } as the last element of result. result.append({ "exponentMinusSign"sv, minus_sign_symbol }); diff --git a/Userland/Libraries/LibUnicode/NumberFormat.cpp b/Userland/Libraries/LibUnicode/NumberFormat.cpp index cffd0d4c04..7933dd005c 100644 --- a/Userland/Libraries/LibUnicode/NumberFormat.cpp +++ b/Userland/Libraries/LibUnicode/NumberFormat.cpp @@ -16,7 +16,7 @@ namespace Unicode { -Optional get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol) +Optional get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] NumericSymbol symbol) { #if ENABLE_UNICODE_DATA return Detail::get_number_system_symbol(locale, system, symbol); diff --git a/Userland/Libraries/LibUnicode/NumberFormat.h b/Userland/Libraries/LibUnicode/NumberFormat.h index b56e870ad7..c08d61097d 100644 --- a/Userland/Libraries/LibUnicode/NumberFormat.h +++ b/Userland/Libraries/LibUnicode/NumberFormat.h @@ -54,7 +54,18 @@ struct NumberFormat { Vector identifiers {}; }; -Optional get_number_system_symbol(StringView locale, StringView system, StringView symbol); +enum class NumericSymbol : u8 { + Decimal, + Exponential, + Group, + Infinity, + MinusSign, + NaN, + PercentSign, + PlusSign, +}; + +Optional get_number_system_symbol(StringView locale, StringView system, NumericSymbol symbol); Optional get_number_system_groupings(StringView locale, StringView system); Optional get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type); Vector get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);