mirror of
https://github.com/RGBCube/serenity
synced 2025-05-29 17:05:06 +00:00
AK: Make String::matches() capable of reporting match positions too
Also, rewrite StringUtils::match(), because the old implementation was fairly broken, e.g. "acdcxb" would *not* match "a*?b".
This commit is contained in:
parent
2d6d1ca67f
commit
0801b1fada
7 changed files with 86 additions and 33 deletions
|
@ -270,6 +270,11 @@ String String::repeated(char ch, size_t count)
|
||||||
return *impl;
|
return *impl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
|
||||||
|
{
|
||||||
|
return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
|
||||||
|
}
|
||||||
|
|
||||||
bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
|
bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
|
||||||
{
|
{
|
||||||
return StringUtils::matches(*this, mask, case_sensitivity);
|
return StringUtils::matches(*this, mask, case_sensitivity);
|
||||||
|
|
|
@ -112,6 +112,7 @@ public:
|
||||||
|
|
||||||
static String repeated(char, size_t count);
|
static String repeated(char, size_t count);
|
||||||
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
||||||
|
bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
||||||
|
|
||||||
Optional<int> to_int() const;
|
Optional<int> to_int() const;
|
||||||
Optional<unsigned> to_uint() const;
|
Optional<unsigned> to_uint() const;
|
||||||
|
|
|
@ -30,62 +30,70 @@
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringUtils.h>
|
#include <AK/StringUtils.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Vector.h>
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
namespace StringUtils {
|
namespace StringUtils {
|
||||||
|
|
||||||
bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity)
|
bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
|
||||||
{
|
{
|
||||||
|
auto record_span = [&match_spans](size_t start, size_t length) {
|
||||||
|
if (match_spans)
|
||||||
|
match_spans->append({ start, length });
|
||||||
|
};
|
||||||
|
|
||||||
if (str.is_null() || mask.is_null())
|
if (str.is_null() || mask.is_null())
|
||||||
return str.is_null() && mask.is_null();
|
return str.is_null() && mask.is_null();
|
||||||
|
|
||||||
|
if (mask == "*") {
|
||||||
|
record_span(0, str.length());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
|
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
|
||||||
const String str_lower = String(str).to_lowercase();
|
const String str_lower = String(str).to_lowercase();
|
||||||
const String mask_lower = String(mask).to_lowercase();
|
const String mask_lower = String(mask).to_lowercase();
|
||||||
return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive);
|
return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive, match_spans);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* string_ptr = str.characters_without_null_termination();
|
const char* string_ptr = str.characters_without_null_termination();
|
||||||
|
const char* string_start = str.characters_without_null_termination();
|
||||||
const char* string_end = string_ptr + str.length();
|
const char* string_end = string_ptr + str.length();
|
||||||
const char* mask_ptr = mask.characters_without_null_termination();
|
const char* mask_ptr = mask.characters_without_null_termination();
|
||||||
const char* mask_end = mask_ptr + mask.length();
|
const char* mask_end = mask_ptr + mask.length();
|
||||||
|
|
||||||
// Match string against mask directly unless we hit a *
|
auto matches_one = [](char ch, char p) {
|
||||||
while ((string_ptr < string_end) && (mask_ptr < mask_end) && (*mask_ptr != '*')) {
|
if (p == '?')
|
||||||
if ((*mask_ptr != *string_ptr) && (*mask_ptr != '?'))
|
|
||||||
return false;
|
|
||||||
mask_ptr++;
|
|
||||||
string_ptr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* cp = nullptr;
|
|
||||||
const char* mp = nullptr;
|
|
||||||
|
|
||||||
while (string_ptr < string_end) {
|
|
||||||
if ((mask_ptr < mask_end) && (*mask_ptr == '*')) {
|
|
||||||
// If we have only a * left, there is no way to not match.
|
|
||||||
if (++mask_ptr == mask_end)
|
|
||||||
return true;
|
return true;
|
||||||
mp = mask_ptr;
|
return p == ch && ch != 0;
|
||||||
cp = string_ptr + 1;
|
};
|
||||||
} else if ((mask_ptr < mask_end) && ((*mask_ptr == *string_ptr) || (*mask_ptr == '?'))) {
|
while (string_ptr < string_end && mask_ptr < mask_end) {
|
||||||
mask_ptr++;
|
auto string_start_ptr = string_ptr;
|
||||||
string_ptr++;
|
switch (*mask_ptr) {
|
||||||
} else if ((cp != nullptr) && (mp != nullptr)) {
|
case '*':
|
||||||
mask_ptr = mp;
|
if (mask_ptr[1] == 0) {
|
||||||
string_ptr = cp++;
|
record_span(string_ptr - string_start, string_end - string_ptr);
|
||||||
} else {
|
return true;
|
||||||
|
}
|
||||||
|
while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1))
|
||||||
|
++string_ptr;
|
||||||
|
record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
|
||||||
|
--string_ptr;
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
record_span(string_ptr - string_start, 1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (!matches_one(*string_ptr, *mask_ptr))
|
||||||
|
return false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
++string_ptr;
|
||||||
|
++mask_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle any trailing mask
|
return string_ptr == string_end && mask_ptr == mask_end;
|
||||||
while ((mask_ptr < mask_end) && (*mask_ptr == '*'))
|
|
||||||
mask_ptr++;
|
|
||||||
|
|
||||||
// If we 'ate' all of the mask and the string then we match.
|
|
||||||
return (mask_ptr == mask_end) && string_ptr == string_end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<int> convert_to_int(const StringView& str)
|
Optional<int> convert_to_int(const StringView& str)
|
||||||
|
|
|
@ -42,9 +42,23 @@ enum class TrimMode {
|
||||||
Both
|
Both
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MaskSpan {
|
||||||
|
size_t start;
|
||||||
|
size_t length;
|
||||||
|
|
||||||
|
bool operator==(const MaskSpan& other) const
|
||||||
|
{
|
||||||
|
return start == other.start && length == other.length;
|
||||||
|
}
|
||||||
|
bool operator!=(const MaskSpan& other) const
|
||||||
|
{
|
||||||
|
return !(*this == other);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace StringUtils {
|
namespace StringUtils {
|
||||||
|
|
||||||
bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive);
|
bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive, Vector<MaskSpan>* match_spans = nullptr);
|
||||||
Optional<int> convert_to_int(const StringView&);
|
Optional<int> convert_to_int(const StringView&);
|
||||||
Optional<unsigned> convert_to_uint(const StringView&);
|
Optional<unsigned> convert_to_uint(const StringView&);
|
||||||
Optional<unsigned> convert_to_uint_from_hex(const StringView&);
|
Optional<unsigned> convert_to_uint_from_hex(const StringView&);
|
||||||
|
|
|
@ -164,6 +164,11 @@ bool StringView::ends_with(const StringView& str, CaseSensitivity case_sensitivi
|
||||||
return StringUtils::ends_with(*this, str, case_sensitivity);
|
return StringUtils::ends_with(*this, str, case_sensitivity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool StringView::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
|
||||||
|
{
|
||||||
|
return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
|
||||||
|
}
|
||||||
|
|
||||||
bool StringView::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
|
bool StringView::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
|
||||||
{
|
{
|
||||||
return StringUtils::matches(*this, mask, case_sensitivity);
|
return StringUtils::matches(*this, mask, case_sensitivity);
|
||||||
|
|
|
@ -87,6 +87,7 @@ public:
|
||||||
bool starts_with(char) const;
|
bool starts_with(char) const;
|
||||||
bool ends_with(char) const;
|
bool ends_with(char) const;
|
||||||
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
||||||
|
bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
||||||
bool contains(char) const;
|
bool contains(char) const;
|
||||||
bool contains(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
bool contains(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
||||||
bool equals_ignoring_case(const StringView& other) const;
|
bool equals_ignoring_case(const StringView& other) const;
|
||||||
|
|
|
@ -67,6 +67,25 @@ TEST_CASE(matches_case_insensitive)
|
||||||
EXPECT(!AK::StringUtils::matches("acdcb", "a*c?b"));
|
EXPECT(!AK::StringUtils::matches("acdcb", "a*c?b"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(matches_with_positions)
|
||||||
|
{
|
||||||
|
Vector<AK::MaskSpan> spans;
|
||||||
|
EXPECT(AK::StringUtils::matches("abbb", "a*", CaseSensitivity::CaseSensitive, &spans));
|
||||||
|
EXPECT(spans == Vector<AK::MaskSpan>({ { 1, 3 } }));
|
||||||
|
|
||||||
|
spans.clear();
|
||||||
|
EXPECT(AK::StringUtils::matches("abbb", "?*", CaseSensitivity::CaseSensitive, &spans));
|
||||||
|
EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 0, 1 }, { 1, 3 } }));
|
||||||
|
|
||||||
|
spans.clear();
|
||||||
|
EXPECT(AK::StringUtils::matches("acdcxb", "a*c?b", CaseSensitivity::CaseSensitive, &spans));
|
||||||
|
EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 2 }, { 4, 1 } }));
|
||||||
|
|
||||||
|
spans.clear();
|
||||||
|
EXPECT(AK::StringUtils::matches("aaaa", "A*", CaseSensitivity::CaseInsensitive, &spans));
|
||||||
|
EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 3 } }));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(convert_to_int)
|
TEST_CASE(convert_to_int)
|
||||||
{
|
{
|
||||||
auto value = AK::StringUtils::convert_to_int(StringView());
|
auto value = AK::StringUtils::convert_to_int(StringView());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue