From 6fcc1c742647f05fc65264290622f67b9a6ec745 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sun, 8 Jan 2023 16:33:30 -0500 Subject: [PATCH] AK+LibUnicode: Provide Unicode-aware String case transformations Since AK can't refer to LibUnicode directly, the strategy here is that if you need case transformations, you can link LibUnicode and receive them. If you try to use either of these methods without linking it, then you'll of course get a linker error (note we don't do any fallbacks to e.g. ASCII case transformations). If you don't need these methods, you don't have to link LibUnicode. --- AK/String.h | 6 ++++ Meta/Lagom/CMakeLists.txt | 1 + Tests/AK/CMakeLists.txt | 2 ++ Tests/AK/TestString.cpp | 38 ++++++++++++++++++++ Userland/Libraries/LibUnicode/CMakeLists.txt | 1 + Userland/Libraries/LibUnicode/String.cpp | 29 +++++++++++++++ 6 files changed, 77 insertions(+) create mode 100644 Userland/Libraries/LibUnicode/String.cpp diff --git a/AK/String.h b/AK/String.h index 01c4c3cc1b..7a8eecce15 100644 --- a/AK/String.h +++ b/AK/String.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,11 @@ public: // Creates a new String from a sequence of UTF-8 encoded code points. static ErrorOr from_utf8(StringView); + // Creates a new String by transforming this String to lower- or uppercase. Using these methods + // require linking LibUnicode into your application. + ErrorOr to_lowercase(Optional const& locale = {}) const; + ErrorOr to_uppercase(Optional const& locale = {}) const; + // Creates a substring with a deep copy of the specified data window. ErrorOr substring_from_byte_offset(size_t start, size_t byte_count) const; diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 152981f46f..e9c2f4c01c 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -558,6 +558,7 @@ if (BUILD_LAGOM) foreach(source ${AK_TEST_SOURCES}) lagom_test(${source} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../Tests/AK) endforeach() + target_link_libraries(TestString LibUnicode) # LibAudio file(GLOB LIBAUDIO_TEST_SOURCES CONFIGURE_DEPENDS "../../Tests/LibAudio/*.cpp") diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt index e7e0c0855f..957304cb77 100644 --- a/Tests/AK/CMakeLists.txt +++ b/Tests/AK/CMakeLists.txt @@ -86,3 +86,5 @@ set(AK_TEST_SOURCES foreach(source IN LISTS AK_TEST_SOURCES) serenity_test("${source}" AK) endforeach() + +target_link_libraries(TestString PRIVATE LibUnicode) diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index 3cc88317a9..3faaa3b654 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -107,3 +107,41 @@ TEST_CASE(replace) EXPECT_EQ(result, "anon@courage:~"sv); } } + +TEST_CASE(to_lowercase) +{ + { + auto string = MUST(String::from_utf8("Aa"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "aa"sv); + } + { + auto string = MUST(String::from_utf8("Ωω"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "ωω"sv); + } + { + auto string = MUST(String::from_utf8("İi̇"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "i̇i̇"sv); + } +} + +TEST_CASE(to_uppercase) +{ + { + auto string = MUST(String::from_utf8("Aa"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "AA"sv); + } + { + auto string = MUST(String::from_utf8("Ωω"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "ΩΩ"sv); + } + { + auto string = MUST(String::from_utf8("ʼn"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "ʼN"sv); + } +} diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 109307cad9..27f9201815 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES CurrencyCode.cpp Emoji.cpp Normalize.cpp + String.cpp UnicodeUtils.cpp ${UNICODE_DATA_SOURCES} ) diff --git a/Userland/Libraries/LibUnicode/String.cpp b/Userland/Libraries/LibUnicode/String.cpp new file mode 100644 index 0000000000..4cbc47d5ad --- /dev/null +++ b/Userland/Libraries/LibUnicode/String.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +// This file contains definitions of AK::String methods which require UCD data. + +namespace AK { + +ErrorOr String::to_lowercase(Optional const& locale) const +{ + StringBuilder builder; + TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale)); + return builder.to_string(); +} + +ErrorOr String::to_uppercase(Optional const& locale) const +{ + StringBuilder builder; + TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale)); + return builder.to_string(); +} + +}