mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 04:08:11 +00:00
AK+LibUnicode: Provide Unicode-aware caseless String matching
The Unicode spec defines much more complicated caseless matching algorithms in its Collation spec. This implements the "basic" case folding comparison.
This commit is contained in:
parent
8f2589b3b0
commit
537fcaf59e
3 changed files with 79 additions and 0 deletions
|
@ -49,6 +49,10 @@ public:
|
||||||
ErrorOr<String> to_lowercase(Optional<StringView> const& locale = {}) const;
|
ErrorOr<String> to_lowercase(Optional<StringView> const& locale = {}) const;
|
||||||
ErrorOr<String> to_uppercase(Optional<StringView> const& locale = {}) const;
|
ErrorOr<String> to_uppercase(Optional<StringView> const& locale = {}) const;
|
||||||
ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}) const;
|
ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}) const;
|
||||||
|
ErrorOr<String> to_casefold() const;
|
||||||
|
|
||||||
|
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
|
||||||
|
ErrorOr<bool> equals_ignoring_case(String const&) const;
|
||||||
|
|
||||||
// Creates a substring with a deep copy of the specified data window.
|
// Creates a substring with a deep copy of the specified data window.
|
||||||
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;
|
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;
|
||||||
|
|
|
@ -187,6 +187,66 @@ TEST_CASE(to_titlecase)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(equals_ignoring_case)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
String string1 {};
|
||||||
|
String string2 {};
|
||||||
|
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string1 = MUST(String::from_utf8("abcd"sv));
|
||||||
|
auto string2 = MUST(String::from_utf8("ABCD"sv));
|
||||||
|
auto string3 = MUST(String::from_utf8("AbCd"sv));
|
||||||
|
auto string4 = MUST(String::from_utf8("dcba"sv));
|
||||||
|
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string3)));
|
||||||
|
EXPECT(!MUST(string1.equals_ignoring_case(string4)));
|
||||||
|
|
||||||
|
EXPECT(MUST(string2.equals_ignoring_case(string1)));
|
||||||
|
EXPECT(MUST(string2.equals_ignoring_case(string3)));
|
||||||
|
EXPECT(!MUST(string2.equals_ignoring_case(string4)));
|
||||||
|
|
||||||
|
EXPECT(MUST(string3.equals_ignoring_case(string1)));
|
||||||
|
EXPECT(MUST(string3.equals_ignoring_case(string2)));
|
||||||
|
EXPECT(!MUST(string3.equals_ignoring_case(string4)));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string1 = MUST(String::from_utf8("\u00DF"sv)); // LATIN SMALL LETTER SHARP S
|
||||||
|
auto string2 = MUST(String::from_utf8("SS"sv));
|
||||||
|
auto string3 = MUST(String::from_utf8("Ss"sv));
|
||||||
|
auto string4 = MUST(String::from_utf8("ss"sv));
|
||||||
|
auto string5 = MUST(String::from_utf8("S"sv));
|
||||||
|
auto string6 = MUST(String::from_utf8("s"sv));
|
||||||
|
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string3)));
|
||||||
|
EXPECT(MUST(string1.equals_ignoring_case(string4)));
|
||||||
|
EXPECT(!MUST(string1.equals_ignoring_case(string5)));
|
||||||
|
EXPECT(!MUST(string1.equals_ignoring_case(string6)));
|
||||||
|
|
||||||
|
EXPECT(MUST(string2.equals_ignoring_case(string1)));
|
||||||
|
EXPECT(MUST(string2.equals_ignoring_case(string3)));
|
||||||
|
EXPECT(MUST(string2.equals_ignoring_case(string4)));
|
||||||
|
EXPECT(!MUST(string2.equals_ignoring_case(string5)));
|
||||||
|
EXPECT(!MUST(string2.equals_ignoring_case(string6)));
|
||||||
|
|
||||||
|
EXPECT(MUST(string3.equals_ignoring_case(string1)));
|
||||||
|
EXPECT(MUST(string3.equals_ignoring_case(string2)));
|
||||||
|
EXPECT(MUST(string3.equals_ignoring_case(string4)));
|
||||||
|
EXPECT(!MUST(string3.equals_ignoring_case(string5)));
|
||||||
|
EXPECT(!MUST(string3.equals_ignoring_case(string6)));
|
||||||
|
|
||||||
|
EXPECT(MUST(string4.equals_ignoring_case(string1)));
|
||||||
|
EXPECT(MUST(string4.equals_ignoring_case(string2)));
|
||||||
|
EXPECT(MUST(string4.equals_ignoring_case(string3)));
|
||||||
|
EXPECT(!MUST(string4.equals_ignoring_case(string5)));
|
||||||
|
EXPECT(!MUST(string4.equals_ignoring_case(string6)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(is_one_of)
|
TEST_CASE(is_one_of)
|
||||||
{
|
{
|
||||||
auto foo = MUST(String::from_utf8("foo"sv));
|
auto foo = MUST(String::from_utf8("foo"sv));
|
||||||
|
|
|
@ -33,4 +33,19 @@ ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
|
||||||
return builder.to_string();
|
return builder.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ErrorOr<String> String::to_casefold() const
|
||||||
|
{
|
||||||
|
StringBuilder builder;
|
||||||
|
TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
|
||||||
|
return builder.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
|
||||||
|
ErrorOr<bool> String::equals_ignoring_case(String const& other) const
|
||||||
|
{
|
||||||
|
// A string X is a caseless match for a string Y if and only if:
|
||||||
|
// toCasefold(X) = toCasefold(Y)
|
||||||
|
return TRY(to_casefold()) == TRY(other.to_casefold());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue