1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-16 18:15:07 +00:00
serenity/Userland/Libraries/LibUnicode/Locale.h
Timothy Flynn 1fbc5dba08 LibUnicode: Generate Unicode locale likely subtag data
CLDR contains a set of likely subtag data where, given a locale, you can
resolve what is the most likely language, script, or territory of that
locale. This data is needed for resolving territory aliases. These
aliases might contain multiple territories, and we need to resolve which
of those territories is most likely correct for a locale.

Note that the likely subtag data is quite huge (a few thousand entries).
As an optimization encouraged by the spec, we only generate the smallest
subset of this data that we actually need (about 150 entries).
2021-09-01 14:14:47 +01:00

86 lines
2.5 KiB
C++

/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Optional.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Variant.h>
#include <AK/Vector.h>
#include <LibUnicode/Forward.h>
namespace Unicode {
struct LanguageID {
bool is_root { false };
Optional<String> language {};
Optional<String> script {};
Optional<String> region {};
Vector<String> variants {};
};
struct Keyword {
String key {};
Vector<String> types {};
};
struct LocaleExtension {
Vector<String> attributes {};
Vector<Keyword> keywords {};
};
struct TransformedField {
String key;
Vector<String> values {};
};
struct TransformedExtension {
Optional<LanguageID> language {};
Vector<TransformedField> fields {};
};
struct OtherExtension {
char key {};
Vector<String> values {};
};
using Extension = Variant<LocaleExtension, TransformedExtension, OtherExtension>;
struct LocaleID {
LanguageID language_id {};
Vector<Extension> extensions {};
Vector<String> private_use_extensions {};
};
// Note: These methods only verify that the provided strings match the EBNF grammar of the
// Unicode identifier subtag (i.e. no validation is done that the tags actually exist).
bool is_unicode_language_subtag(StringView);
bool is_unicode_script_subtag(StringView);
bool is_unicode_region_subtag(StringView);
bool is_unicode_variant_subtag(StringView);
Optional<LanguageID> parse_unicode_language_id(StringView);
Optional<LocaleID> parse_unicode_locale_id(StringView);
Optional<String> canonicalize_unicode_locale_id(LocaleID&);
String const& default_locale();
bool is_locale_available(StringView locale);
Optional<StringView> get_locale_language_mapping(StringView locale, StringView language);
Optional<StringView> get_locale_territory_mapping(StringView locale, StringView territory);
Optional<StringView> get_locale_script_mapping(StringView locale, StringView script);
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency);
Optional<StringView> resolve_language_alias(StringView language);
Optional<StringView> resolve_territory_alias(StringView territory);
Optional<StringView> resolve_script_tag_alias(StringView script_tag);
Optional<StringView> resolve_variant_alias(StringView variant);
Optional<StringView> resolve_subdivision_alias(StringView subdivision);
String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
}