mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 17:07:34 +00:00
LibUnicode: Add decomposition mappings and Unicode normalization
The mappings are exposed via `Unicode::code_point_decomposition(u32)` and `Unicode::code_point_decompositions()`, the latter being useful for reverse searching a code point from its decomposition. The normalization code does not make use of `Quick_Check` props (https://www.unicode.org/reports/tr44/#Decompositions_and_Normalization), meaning no quick check optimizations.
This commit is contained in:
parent
e8410bc2ee
commit
70d0c1616f
5 changed files with 392 additions and 2 deletions
30
Userland/Libraries/LibUnicode/Normalize.h
Normal file
30
Userland/Libraries/LibUnicode/Normalize.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2022, mat
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Span.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <LibUnicode/Forward.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point);
|
||||
Span<CodePointDecomposition const> code_point_decompositions();
|
||||
|
||||
enum class NormalizationForm {
|
||||
NFD,
|
||||
NFC,
|
||||
NFKD,
|
||||
NFKC
|
||||
};
|
||||
|
||||
[[nodiscard]] String normalize(StringView string, NormalizationForm form);
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue