mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 18:44:59 +00:00
LibJS+LibUnicode: Support multiple identifiers within format pattern
This wasn't the case for compact patterns, but unit patterns can contain multiple (up to 2, really) identifiers that must each be recognized by LibJS. Each generated NumberFormat object now stores an array of identifiers parsed. The format pattern itself is encoded with the index into this array for that identifier, e.g. the compact format string "0K" will become "{number}{compactIdentifier:0}".
This commit is contained in:
parent
3b68370212
commit
04b8b87c17
3 changed files with 72 additions and 40 deletions
|
@ -57,7 +57,7 @@ struct NumberFormat : public Unicode::NumberFormat {
|
|||
StringIndexType zero_format_index { 0 };
|
||||
StringIndexType positive_format_index { 0 };
|
||||
StringIndexType negative_format_index { 0 };
|
||||
StringIndexType identifier_index { 0 };
|
||||
Vector<StringIndexType> identifier_indices {};
|
||||
};
|
||||
|
||||
struct NumberSystem {
|
||||
|
@ -88,46 +88,61 @@ struct UnicodeLocaleData {
|
|||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
HashMap<String, Locale> locales;
|
||||
Vector<String> numeric_symbols;
|
||||
size_t max_identifier_count { 0 };
|
||||
};
|
||||
|
||||
static String parse_identifier(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
|
||||
static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
|
||||
{
|
||||
static Utf8View whitespace { "\u0020\u00a0"sv };
|
||||
|
||||
Utf8View utf8_pattern { pattern };
|
||||
Optional<size_t> start_index;
|
||||
Optional<size_t> end_index;
|
||||
bool inside_replacement = false;
|
||||
while (true) {
|
||||
Utf8View utf8_pattern { pattern };
|
||||
Optional<size_t> start_index;
|
||||
Optional<size_t> end_index;
|
||||
bool inside_replacement = false;
|
||||
|
||||
for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
|
||||
if (*it == '{') {
|
||||
if (start_index.has_value()) {
|
||||
end_index = utf8_pattern.byte_offset_of(it);
|
||||
break;
|
||||
for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
|
||||
if (*it == '{') {
|
||||
if (start_index.has_value()) {
|
||||
end_index = utf8_pattern.byte_offset_of(it);
|
||||
break;
|
||||
}
|
||||
|
||||
inside_replacement = true;
|
||||
} else if (*it == '}') {
|
||||
inside_replacement = false;
|
||||
} else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
|
||||
start_index = utf8_pattern.byte_offset_of(it);
|
||||
}
|
||||
|
||||
inside_replacement = true;
|
||||
} else if (*it == '}') {
|
||||
inside_replacement = false;
|
||||
} else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
|
||||
start_index = utf8_pattern.byte_offset_of(it);
|
||||
}
|
||||
|
||||
if (!start_index.has_value())
|
||||
return pattern;
|
||||
|
||||
end_index = end_index.value_or(pattern.length());
|
||||
|
||||
utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
|
||||
utf8_pattern = utf8_pattern.trim(whitespace);
|
||||
|
||||
auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
|
||||
auto identifier_index = locale_data.unique_strings.ensure(move(identifier));
|
||||
size_t replacement_index = 0;
|
||||
|
||||
if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) {
|
||||
replacement_index = *index;
|
||||
} else {
|
||||
replacement_index = format.identifier_indices.size();
|
||||
format.identifier_indices.append(identifier_index);
|
||||
|
||||
locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size());
|
||||
}
|
||||
|
||||
pattern = String::formatted("{}{{{}:{}}}{}",
|
||||
*start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
|
||||
replacement,
|
||||
replacement_index,
|
||||
pattern.substring_view(*start_index + utf8_pattern.byte_length()));
|
||||
}
|
||||
|
||||
if (!start_index.has_value())
|
||||
return pattern;
|
||||
end_index = end_index.value_or(pattern.length());
|
||||
|
||||
utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
|
||||
utf8_pattern = utf8_pattern.trim(whitespace);
|
||||
|
||||
auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
|
||||
format.identifier_index = locale_data.unique_strings.ensure(move(identifier));
|
||||
|
||||
return String::formatted("{}{}{}",
|
||||
*start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
|
||||
replacement,
|
||||
pattern.substring_view(*start_index + utf8_pattern.byte_length()));
|
||||
}
|
||||
|
||||
static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
|
||||
|
@ -188,7 +203,7 @@ static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& loc
|
|||
}
|
||||
|
||||
if (type == NumberFormatType::Compact)
|
||||
return parse_identifier(move(pattern), "{compactIdentifier}"sv, locale_data, format);
|
||||
return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format);
|
||||
|
||||
return pattern;
|
||||
};
|
||||
|
@ -413,6 +428,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
|
|||
SourceGenerator generator { builder };
|
||||
generator.set("string_index_type"sv, s_string_index_type);
|
||||
generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size()));
|
||||
generator.set("identifier_count", String::number(locale_data.max_identifier_count));
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <AK/Array.h>
|
||||
|
@ -437,7 +453,10 @@ struct NumberFormat {
|
|||
number_format.zero_format = s_string_list[zero_format];
|
||||
number_format.positive_format = s_string_list[positive_format];
|
||||
number_format.negative_format = s_string_list[negative_format];
|
||||
number_format.identifier = s_string_list[identifier];
|
||||
|
||||
number_format.identifiers.ensure_capacity(identifiers.size());
|
||||
for (@string_index_type@ identifier : identifiers)
|
||||
number_format.identifiers.append(s_string_list[identifier]);
|
||||
|
||||
return number_format;
|
||||
}
|
||||
|
@ -448,7 +467,7 @@ struct NumberFormat {
|
|||
@string_index_type@ zero_format { 0 };
|
||||
@string_index_type@ positive_format { 0 };
|
||||
@string_index_type@ negative_format { 0 };
|
||||
@string_index_type@ identifier { 0 };
|
||||
Array<@string_index_type@, @identifier_count@> identifiers {};
|
||||
};
|
||||
|
||||
struct NumberSystem {
|
||||
|
@ -479,8 +498,18 @@ struct NumberSystem {
|
|||
generator.set("zero_format"sv, String::number(number_format.zero_format_index));
|
||||
generator.set("positive_format"sv, String::number(number_format.positive_format_index));
|
||||
generator.set("negative_format"sv, String::number(number_format.negative_format_index));
|
||||
generator.set("identifier"sv, String::number(number_format.identifier_index));
|
||||
generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @identifier@ },");
|
||||
generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, { ");
|
||||
|
||||
bool first = true;
|
||||
for (auto identifier_index : number_format.identifier_indices) {
|
||||
if (!first)
|
||||
generator.append(", ");
|
||||
|
||||
generator.append(String::number(identifier_index));
|
||||
first = false;
|
||||
}
|
||||
|
||||
generator.append(" } },");
|
||||
};
|
||||
|
||||
auto append_number_formats = [&](String name, auto const& number_formats) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue