1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-06-01 02:38:13 +00:00

LibUnicode: Port generator utility methods to ErrorOr

Most of these were VERIFY-ing for success, but propagating an error
message up to serenity_main() is much nicer than just a SIGABRT.
This commit is contained in:
Timothy Flynn 2021-11-23 11:54:53 -05:00 committed by Andreas Kling
parent 7f780e43a6
commit 0aa3e5c2ea
3 changed files with 87 additions and 109 deletions

View file

@ -77,17 +77,21 @@ struct UnicodeLocaleData {
size_t max_variant_size { 0 }; size_t max_variant_size { 0 };
}; };
static Optional<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias) // Some parsing is expected to fail. For example, the CLDR contains language mappings
// with locales such as "en-GB-oed" that are canonically invalid locale IDs.
#define TRY_OR_DISCARD(expression) \
({ \
auto _temporary_result = (expression); \
if (_temporary_result.is_error()) \
return; \
_temporary_result.release_value(); \
})
static ErrorOr<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
{ {
auto parsed_key = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, key); auto parsed_key = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, key));
if (!parsed_key.has_value()) auto parsed_alias = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, alias));
return {}; return LanguageMapping { move(parsed_key), move(parsed_alias) };
auto parsed_alias = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, alias);
if (!parsed_alias.has_value())
return {};
return LanguageMapping { parsed_key.release_value(), parsed_alias.release_value() };
} }
static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data) static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
@ -107,13 +111,10 @@ static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLo
auto alias = value.as_object().get("_replacement"sv).as_string(); auto alias = value.as_object().get("_replacement"sv).as_string();
if (key.contains('-')) { if (key.contains('-')) {
auto mapping = parse_language_mapping(locale_data, key, alias); auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, alias));
if (!mapping.has_value()) locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
return; locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
locale_data.complex_mappings.append(move(mapping));
locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
locale_data.complex_mappings.append(mapping.release_value());
} else { } else {
alias_map.set(key, locale_data.unique_strings.ensure(alias)); alias_map.set(key, locale_data.unique_strings.ensure(alias));
} }
@ -141,13 +142,10 @@ static ErrorOr<void> parse_likely_subtags(String core_supplemental_path, Unicode
auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv); auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
auto mapping = parse_language_mapping(locale_data, key, value.as_string()); auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, value.as_string()));
if (!mapping.has_value()) locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
return; locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
locale_data.likely_subtags.append(move(mapping));
locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
locale_data.likely_subtags.append(mapping.release_value());
}); });
return {}; return {};
@ -417,7 +415,7 @@ static ErrorOr<void> parse_default_content_locales(String core_path, UnicodeLoca
return {}; return {};
} }
static void define_aliases_without_scripts(UnicodeLocaleData& locale_data) static ErrorOr<void> define_aliases_without_scripts(UnicodeLocaleData& locale_data)
{ {
// From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots // From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
// //
@ -431,40 +429,42 @@ static void define_aliases_without_scripts(UnicodeLocaleData& locale_data)
return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; }); return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; });
}; };
auto append_alias_without_script = [&](auto const& locale) { auto append_alias_without_script = [&](auto const& locale) -> ErrorOr<void> {
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale); auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale));
VERIFY(parsed_locale.has_value()); if ((parsed_locale.language == 0) || (parsed_locale.script == 0) || (parsed_locale.region == 0))
return {};
if ((parsed_locale->language == 0) || (parsed_locale->script == 0) || (parsed_locale->region == 0))
return;
auto locale_without_script = String::formatted("{}-{}", auto locale_without_script = String::formatted("{}-{}",
locale_data.unique_strings.get(parsed_locale->language), locale_data.unique_strings.get(parsed_locale.language),
locale_data.unique_strings.get(parsed_locale->region)); locale_data.unique_strings.get(parsed_locale.region));
if (locale_data.locales.contains(locale_without_script)) if (locale_data.locales.contains(locale_without_script))
return; return {};
if (find_alias(locale_without_script) != locale_data.locale_aliases.end()) if (find_alias(locale_without_script) != locale_data.locale_aliases.end())
return; return {};
if (auto it = find_alias(locale); it != locale_data.locale_aliases.end()) if (auto it = find_alias(locale); it != locale_data.locale_aliases.end())
locale_data.locale_aliases.append({ it->name, locale_without_script }); locale_data.locale_aliases.append({ it->name, locale_without_script });
else else
locale_data.locale_aliases.append({ locale, locale_without_script }); locale_data.locale_aliases.append({ locale, locale_without_script });
return {};
}; };
for (auto const& locale : locale_data.locales) for (auto const& locale : locale_data.locales)
append_alias_without_script(locale.key); TRY(append_alias_without_script(locale.key));
for (auto const& locale : locale_data.locale_aliases) for (auto const& locale : locale_data.locale_aliases)
append_alias_without_script(locale.alias); TRY(append_alias_without_script(locale.alias));
return {};
} }
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data) static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
{ {
auto identity_iterator = path_to_dir_iterator(locale_names_path); auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path));
auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path)); auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
auto misc_iterator = path_to_dir_iterator(move(misc_path)); auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path)));
auto numbers_iterator = path_to_dir_iterator(move(numbers_path)); auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
LexicalPath core_supplemental_path(core_path); LexicalPath core_supplemental_path(core_path);
core_supplemental_path = core_supplemental_path.append("supplemental"sv); core_supplemental_path = core_supplemental_path.append("supplemental"sv);
@ -473,74 +473,56 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
TRY(parse_core_aliases(core_supplemental_path.string(), locale_data)); TRY(parse_core_aliases(core_supplemental_path.string(), locale_data));
TRY(parse_likely_subtags(core_supplemental_path.string(), locale_data)); TRY(parse_likely_subtags(core_supplemental_path.string(), locale_data));
auto remove_variants_from_path = [&](String path) -> Optional<String> { auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)); auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
if (!parsed_locale.has_value())
return {};
StringBuilder builder; StringBuilder builder;
builder.append(locale_data.unique_strings.get(parsed_locale->language)); builder.append(locale_data.unique_strings.get(parsed_locale.language));
if (auto script = locale_data.unique_strings.get(parsed_locale->script); !script.is_empty()) if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
builder.appendff("-{}", script); builder.appendff("-{}", script);
if (auto region = locale_data.unique_strings.get(parsed_locale->region); !region.is_empty()) if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
builder.appendff("-{}", region); builder.appendff("-{}", region);
return builder.build(); return builder.build();
}; };
while (identity_iterator.has_next()) { while (identity_iterator.has_next()) {
auto locale_path = identity_iterator.next_full_path(); auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator));
VERIFY(Core::File::is_directory(locale_path)); auto language = TRY(remove_variants_from_path(locale_path));
auto language = remove_variants_from_path(locale_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_identity(locale_path, locale_data, locale)); TRY(parse_identity(locale_path, locale_data, locale));
} }
while (locale_names_iterator.has_next()) { while (locale_names_iterator.has_next()) {
auto locale_path = locale_names_iterator.next_full_path(); auto locale_path = TRY(next_path_from_dir_iterator(locale_names_iterator));
VERIFY(Core::File::is_directory(locale_path)); auto language = TRY(remove_variants_from_path(locale_path));
auto language = remove_variants_from_path(locale_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_locale_languages(locale_path, locale_data, locale)); TRY(parse_locale_languages(locale_path, locale_data, locale));
TRY(parse_locale_territories(locale_path, locale_data, locale)); TRY(parse_locale_territories(locale_path, locale_data, locale));
TRY(parse_locale_scripts(locale_path, locale_data, locale)); TRY(parse_locale_scripts(locale_path, locale_data, locale));
} }
while (misc_iterator.has_next()) { while (misc_iterator.has_next()) {
auto misc_path = misc_iterator.next_full_path(); auto misc_path = TRY(next_path_from_dir_iterator(misc_iterator));
VERIFY(Core::File::is_directory(misc_path)); auto language = TRY(remove_variants_from_path(misc_path));
auto language = remove_variants_from_path(misc_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_locale_list_patterns(misc_path, locale_data, locale)); TRY(parse_locale_list_patterns(misc_path, locale_data, locale));
} }
while (numbers_iterator.has_next()) { while (numbers_iterator.has_next()) {
auto numbers_path = numbers_iterator.next_full_path(); auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
VERIFY(Core::File::is_directory(numbers_path)); auto language = TRY(remove_variants_from_path(numbers_path));
auto language = remove_variants_from_path(numbers_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_locale_currencies(numbers_path, locale_data, locale)); TRY(parse_locale_currencies(numbers_path, locale_data, locale));
TRY(parse_numeric_keywords(numbers_path, locale_data, locale)); TRY(parse_numeric_keywords(numbers_path, locale_data, locale));
} }
TRY(parse_default_content_locales(move(core_path), locale_data)); TRY(parse_default_content_locales(move(core_path), locale_data));
define_aliases_without_scripts(locale_data); TRY(define_aliases_without_scripts(locale_data));
return {}; return {};
} }

View file

@ -445,45 +445,35 @@ static ErrorOr<void> parse_units(String locale_units_path, UnicodeLocaleData& lo
static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, UnicodeLocaleData& locale_data) static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, UnicodeLocaleData& locale_data)
{ {
auto numbers_iterator = path_to_dir_iterator(move(numbers_path)); auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
auto units_iterator = path_to_dir_iterator(move(units_path)); auto units_iterator = TRY(path_to_dir_iterator(move(units_path)));
auto remove_variants_from_path = [&](String path) -> Optional<String> { auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)); auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
if (!parsed_locale.has_value())
return {};
StringBuilder builder; StringBuilder builder;
builder.append(locale_data.unique_strings.get(parsed_locale->language)); builder.append(locale_data.unique_strings.get(parsed_locale.language));
if (auto script = locale_data.unique_strings.get(parsed_locale->script); !script.is_empty()) if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
builder.appendff("-{}", script); builder.appendff("-{}", script);
if (auto region = locale_data.unique_strings.get(parsed_locale->region); !region.is_empty()) if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
builder.appendff("-{}", region); builder.appendff("-{}", region);
return builder.build(); return builder.build();
}; };
while (numbers_iterator.has_next()) { while (numbers_iterator.has_next()) {
auto numbers_path = numbers_iterator.next_full_path(); auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
VERIFY(Core::File::is_directory(numbers_path)); auto language = TRY(remove_variants_from_path(numbers_path));
auto language = remove_variants_from_path(numbers_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_number_systems(numbers_path, locale_data, locale)); TRY(parse_number_systems(numbers_path, locale_data, locale));
} }
while (units_iterator.has_next()) { while (units_iterator.has_next()) {
auto units_path = units_iterator.next_full_path(); auto units_path = TRY(next_path_from_dir_iterator(units_iterator));
VERIFY(Core::File::is_directory(units_path)); auto language = TRY(remove_variants_from_path(units_path));
auto language = remove_variants_from_path(units_path); auto& locale = locale_data.locales.ensure(language);
if (!language.has_value())
continue;
auto& locale = locale_data.locales.ensure(*language);
TRY(parse_units(units_path, locale_data, locale)); TRY(parse_units(units_path, locale_data, locale));
} }

View file

@ -94,7 +94,7 @@ struct Alias {
template<typename StringIndexType> template<typename StringIndexType>
struct CanonicalLanguageID { struct CanonicalLanguageID {
static Optional<CanonicalLanguageID> parse(UniqueStringStorage<StringIndexType>& unique_strings, StringView language) static ErrorOr<CanonicalLanguageID> parse(UniqueStringStorage<StringIndexType>& unique_strings, StringView language)
{ {
CanonicalLanguageID language_id {}; CanonicalLanguageID language_id {};
@ -107,7 +107,7 @@ struct CanonicalLanguageID {
if (segments.size() == ++index) if (segments.size() == ++index)
return language_id; return language_id;
} else { } else {
return {}; return Error::from_string_literal("Expected language subtag"sv);
} }
if (Unicode::is_unicode_script_subtag(segments[index])) { if (Unicode::is_unicode_script_subtag(segments[index])) {
@ -124,7 +124,7 @@ struct CanonicalLanguageID {
while (index < segments.size()) { while (index < segments.size()) {
if (!Unicode::is_unicode_variant_subtag(segments[index])) if (!Unicode::is_unicode_variant_subtag(segments[index]))
return {}; return Error::from_string_literal("Expected variant subtag"sv);
language_id.variants.append(unique_strings.ensure(segments[index++])); language_id.variants.append(unique_strings.ensure(segments[index++]));
} }
@ -137,21 +137,27 @@ struct CanonicalLanguageID {
Vector<StringIndexType> variants {}; Vector<StringIndexType> variants {};
}; };
inline Core::DirIterator path_to_dir_iterator(String path) inline ErrorOr<Core::DirIterator> path_to_dir_iterator(String path)
{ {
LexicalPath lexical_path(move(path)); LexicalPath lexical_path(move(path));
lexical_path = lexical_path.append("main"sv); lexical_path = lexical_path.append("main"sv);
VERIFY(Core::File::is_directory(lexical_path.string()));
Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir); Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir);
if (iterator.has_error()) { if (iterator.has_error())
warnln("{}: {}", lexical_path.string(), iterator.error_string()); return Error::from_string_literal(iterator.error_string());
VERIFY_NOT_REACHED();
}
return iterator; return iterator;
} }
inline ErrorOr<String> next_path_from_dir_iterator(Core::DirIterator& iterator)
{
auto next_path = iterator.next_full_path();
if (iterator.has_error())
return Error::from_string_literal(iterator.error_string());
return next_path;
}
inline void ensure_from_string_types_are_generated(SourceGenerator& generator) inline void ensure_from_string_types_are_generated(SourceGenerator& generator)
{ {
static bool generated_from_string_types = false; static bool generated_from_string_types = false;