mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 06:07:44 +00:00
LibUnicode: Implement grammar validators for Unicode TR-35
ECMA-402 requires validating user input against the EBNF grammar for Unicode locales described in TR-35: https://www.unicode.org/reports/tr35 This commit adds validators for that grammar, as well as other helper to e.g. canonicalize a locale string.
This commit is contained in:
parent
3127454642
commit
b7a95cba65
4 changed files with 313 additions and 0 deletions
40
Userland/Libraries/LibUnicode/Locale.h
Normal file
40
Userland/Libraries/LibUnicode/Locale.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibUnicode/Forward.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
struct LanguageID {
|
||||
bool is_root { false };
|
||||
Optional<StringView> language {};
|
||||
Optional<StringView> script {};
|
||||
Optional<StringView> region {};
|
||||
Vector<StringView> variants {};
|
||||
};
|
||||
|
||||
struct LocaleID {
|
||||
LanguageID language_id {};
|
||||
};
|
||||
|
||||
// Note: These methods only verify that the provided strings match the EBNF grammar of the
|
||||
// Unicode identifier subtag (i.e. no validation is done that the tags actually exist).
|
||||
bool is_unicode_language_subtag(StringView);
|
||||
bool is_unicode_script_subtag(StringView);
|
||||
bool is_unicode_region_subtag(StringView);
|
||||
bool is_unicode_variant_subtag(StringView);
|
||||
|
||||
Optional<LanguageID> parse_unicode_language_id(StringView);
|
||||
Optional<LocaleID> parse_unicode_locale_id(StringView);
|
||||
Optional<String> canonicalize_unicode_locale_id(LocaleID&);
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue