diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp index de68f3ab01..a1b20e404b 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp @@ -57,7 +57,7 @@ struct NumberFormat : public Unicode::NumberFormat { StringIndexType zero_format_index { 0 }; StringIndexType positive_format_index { 0 }; StringIndexType negative_format_index { 0 }; - StringIndexType identifier_index { 0 }; + Vector identifier_indices {}; }; struct NumberSystem { @@ -88,46 +88,61 @@ struct UnicodeLocaleData { UniqueStringStorage unique_strings; HashMap locales; Vector numeric_symbols; + size_t max_identifier_count { 0 }; }; -static String parse_identifier(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format) +static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format) { static Utf8View whitespace { "\u0020\u00a0"sv }; - Utf8View utf8_pattern { pattern }; - Optional start_index; - Optional end_index; - bool inside_replacement = false; + while (true) { + Utf8View utf8_pattern { pattern }; + Optional start_index; + Optional end_index; + bool inside_replacement = false; - for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) { - if (*it == '{') { - if (start_index.has_value()) { - end_index = utf8_pattern.byte_offset_of(it); - break; + for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) { + if (*it == '{') { + if (start_index.has_value()) { + end_index = utf8_pattern.byte_offset_of(it); + break; + } + + inside_replacement = true; + } else if (*it == '}') { + inside_replacement = false; + } else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) { + start_index = utf8_pattern.byte_offset_of(it); } - - inside_replacement = true; - } else if (*it == '}') { - inside_replacement = false; - } else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) { - start_index = utf8_pattern.byte_offset_of(it); } + + if (!start_index.has_value()) + return pattern; + + end_index = end_index.value_or(pattern.length()); + + utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index); + utf8_pattern = utf8_pattern.trim(whitespace); + + auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv); + auto identifier_index = locale_data.unique_strings.ensure(move(identifier)); + size_t replacement_index = 0; + + if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) { + replacement_index = *index; + } else { + replacement_index = format.identifier_indices.size(); + format.identifier_indices.append(identifier_index); + + locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size()); + } + + pattern = String::formatted("{}{{{}:{}}}{}", + *start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv, + replacement, + replacement_index, + pattern.substring_view(*start_index + utf8_pattern.byte_length())); } - - if (!start_index.has_value()) - return pattern; - end_index = end_index.value_or(pattern.length()); - - utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index); - utf8_pattern = utf8_pattern.trim(whitespace); - - auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv); - format.identifier_index = locale_data.unique_strings.ensure(move(identifier)); - - return String::formatted("{}{}{}", - *start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv, - replacement, - pattern.substring_view(*start_index + utf8_pattern.byte_length())); } static void parse_number_pattern(Vector patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr) @@ -188,7 +203,7 @@ static void parse_number_pattern(Vector patterns, UnicodeLocaleData& loc } if (type == NumberFormatType::Compact) - return parse_identifier(move(pattern), "{compactIdentifier}"sv, locale_data, format); + return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format); return pattern; }; @@ -413,6 +428,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca SourceGenerator generator { builder }; generator.set("string_index_type"sv, s_string_index_type); generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size())); + generator.set("identifier_count", String::number(locale_data.max_identifier_count)); generator.append(R"~~~( #include @@ -437,7 +453,10 @@ struct NumberFormat { number_format.zero_format = s_string_list[zero_format]; number_format.positive_format = s_string_list[positive_format]; number_format.negative_format = s_string_list[negative_format]; - number_format.identifier = s_string_list[identifier]; + + number_format.identifiers.ensure_capacity(identifiers.size()); + for (@string_index_type@ identifier : identifiers) + number_format.identifiers.append(s_string_list[identifier]); return number_format; } @@ -448,7 +467,7 @@ struct NumberFormat { @string_index_type@ zero_format { 0 }; @string_index_type@ positive_format { 0 }; @string_index_type@ negative_format { 0 }; - @string_index_type@ identifier { 0 }; + Array<@string_index_type@, @identifier_count@> identifiers {}; }; struct NumberSystem { @@ -479,8 +498,18 @@ struct NumberSystem { generator.set("zero_format"sv, String::number(number_format.zero_format_index)); generator.set("positive_format"sv, String::number(number_format.positive_format_index)); generator.set("negative_format"sv, String::number(number_format.negative_format_index)); - generator.set("identifier"sv, String::number(number_format.identifier_index)); - generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @identifier@ },"); + generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, { "); + + bool first = true; + for (auto identifier_index : number_format.identifier_indices) { + if (!first) + generator.append(", "); + + generator.append(String::number(identifier_index)); + first = false; + } + + generator.append(" } },"); }; auto append_number_formats = [&](String name, auto const& number_formats) { diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index f2bfecefd9..9373105fa4 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -962,11 +962,14 @@ Vector partition_notation_sub_pattern(NumberFormat& number_for } // iv. Else if p is equal to "compactSymbol", then // v. Else if p is equal to "compactName", then - else if (part == "compactIdentifier"sv) { + else if (part.starts_with("compactIdentifier:"sv)) { // Note: Our implementation combines "compactSymbol" and "compactName" into one field, "compactIdentifier". + auto identifier_index = part.substring_view("compactIdentifier:"sv.length()).to_uint(); + VERIFY(identifier_index.has_value()); + // 1. Let compactSymbol be an ILD string representing exponent in short form, which may depend on x in languages having different plural forms. The implementation must be able to provide this string, or else the pattern would not have a "{compactSymbol}" placeholder. - auto compact_identifier = number_format.compact_format().identifier; + auto compact_identifier = number_format.compact_format().identifiers[*identifier_index]; // 2. Append a new Record { [[Type]]: "compact", [[Value]]: compactSymbol } as the last element of result. result.append({ "compact"sv, compact_identifier }); diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index a82856b3e6..0979e4851f 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -122,7 +122,7 @@ struct NumberFormat { StringView zero_format {}; StringView positive_format {}; StringView negative_format {}; - StringView identifier {}; + Vector identifiers {}; }; struct ListPatterns {