1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 17:07:34 +00:00

LibUnicode: Add decomposition mappings and Unicode normalization

The mappings are exposed via `Unicode::code_point_decomposition(u32)`
and `Unicode::code_point_decompositions()`, the latter being useful for
reverse searching a code point from its decomposition.

The normalization code does not make use of `Quick_Check` props (https://www.unicode.org/reports/tr44/#Decompositions_and_Normalization),
meaning no quick check optimizations.
This commit is contained in:
matcool 2022-10-02 22:57:22 -03:00 committed by Tim Flynn
parent e8410bc2ee
commit 70d0c1616f
5 changed files with 392 additions and 2 deletions

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2022, mat
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <LibUnicode/Forward.h>
namespace Unicode {
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point);
Span<CodePointDecomposition const> code_point_decompositions();
enum class NormalizationForm {
NFD,
NFC,
NFKD,
NFKC
};
[[nodiscard]] String normalize(StringView string, NormalizationForm form);
}