From 3c2565da9427aa245d23c7e7a9ebfeec59f02cf7 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 20 May 2021 12:56:38 +0200 Subject: [PATCH] AK: Add UnicodeUtils with Unicode-related helper functions This introduces the UnicodeUtils file, which contains helper functions related to Unicode. This is in contrast to StringUtils, whose functions are not directly related to Unicode and are, in theory, encoding-agnostic. --- AK/UnicodeUtils.cpp | 37 +++++++++++++++++++++++++++++++++++++ AK/UnicodeUtils.h | 20 ++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 AK/UnicodeUtils.cpp create mode 100644 AK/UnicodeUtils.h diff --git a/AK/UnicodeUtils.cpp b/AK/UnicodeUtils.cpp new file mode 100644 index 0000000000..2db467065c --- /dev/null +++ b/AK/UnicodeUtils.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021, Max Wipfli + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +namespace AK::UnicodeUtils { + +Optional get_unicode_control_code_point_alias(u32 code_point) +{ + static constexpr Array ascii_controls_lookup_table = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", + "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", + "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", + "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US" + }; + + static constexpr Array c1_controls_lookup_table = { + "XXX", "XXX", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", + "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3", + "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA", + "SOS", "XXX", "SCI", "CSI", "ST", "OSC", "PM", "APC" + }; + + if (code_point < 0x20) + return ascii_controls_lookup_table[code_point]; + if (code_point >= 0x80 && code_point < 0xa0) + return c1_controls_lookup_table[code_point - 0x80]; + return {}; +} + +} diff --git a/AK/UnicodeUtils.h b/AK/UnicodeUtils.h new file mode 100644 index 0000000000..e7211deaea --- /dev/null +++ b/AK/UnicodeUtils.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2021, Max Wipfli + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace AK::UnicodeUtils { + +constexpr bool is_unicode_control_code_point(u32 code_point) +{ + return code_point < 0x20 || (code_point >= 0x80 && code_point < 0xa0); +} + +Optional get_unicode_control_code_point_alias(u32); + +}