diff --git a/AK/MemMem.h b/AK/MemMem.h index cc2edef4d3..38b7f8527e 100644 --- a/AK/MemMem.h +++ b/AK/MemMem.h @@ -26,8 +26,11 @@ #pragma once +#include #include +#include #include +#include namespace AK { @@ -59,6 +62,64 @@ const static void* bitap_bitwise(const void* haystack, size_t haystack_length, c } } +template +static inline Optional memmem(const HaystackIterT& haystack_begin, const HaystackIterT& haystack_end, Span needle) requires(requires { (*haystack_begin).data(); (*haystack_begin).size(); }) +{ + auto prepare_kmp_partial_table = [&] { + Vector table; + table.resize(needle.size()); + + size_t position = 1; + int candidate = 0; + + table[0] = -1; + while (position < needle.size()) { + if (needle[position] == needle[candidate]) { + table[position] = table[candidate]; + } else { + table[position] = candidate; + do { + candidate = table[candidate]; + } while (candidate >= 0 && needle[candidate] != needle[position]); + } + ++position; + ++candidate; + } + return table; + }; + + auto table = prepare_kmp_partial_table(); + size_t total_haystack_index = 0; + size_t current_haystack_index = 0; + int needle_index = 0; + auto haystack_it = haystack_begin; + + while (haystack_it != haystack_end) { + auto&& chunk = *haystack_it; + if (current_haystack_index >= chunk.size()) { + current_haystack_index = 0; + ++haystack_it; + continue; + } + if (needle[needle_index] == chunk[current_haystack_index]) { + ++needle_index; + ++current_haystack_index; + ++total_haystack_index; + if ((size_t)needle_index == needle.size()) + return total_haystack_index - needle_index; + continue; + } + needle_index = table[needle_index]; + if (needle_index < 0) { + ++needle_index; + ++current_haystack_index; + ++total_haystack_index; + } + } + + return {}; +} + static inline const void* memmem(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) { if (needle_length == 0) @@ -73,15 +134,14 @@ static inline const void* memmem(const void* haystack, size_t haystack_length, c if (needle_length < 32) return bitap_bitwise(haystack, haystack_length, needle, needle_length); - // Fallback to a slower search. - auto length_diff = haystack_length - needle_length; - for (size_t i = 0; i < length_diff; ++i) { - const auto* start = ((const u8*)haystack) + i; - if (__builtin_memcmp(start, needle, needle_length) == 0) - return start; - } + // Fallback to KMP. + Array, 1> spans { Span { (const u8*)haystack, haystack_length } }; + auto result = memmem(spans.begin(), spans.end(), { (const u8*)needle, needle_length }); - return nullptr; + if (result.has_value()) + return (const u8*)haystack + result.value(); + + return {}; } } diff --git a/AK/Tests/TestMemMem.cpp b/AK/Tests/TestMemMem.cpp new file mode 100644 index 0000000000..4f7b383412 --- /dev/null +++ b/AK/Tests/TestMemMem.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +TEST_CASE(bitap) +{ + Array haystack { 1, 0, 1, 2, 3, 4, 5, 0 }; + Array needle_0 { 2, 3, 4, 5 }; + Array needle_1 { 1, 2, 3, 4 }; + Array needle_2 { 3, 4, 5, 0 }; + Array needle_3 { 3, 4, 5, 6 }; + + auto result_0 = AK::memmem(haystack.data(), haystack.size(), needle_0.data(), needle_0.size()); + auto result_1 = AK::memmem(haystack.data(), haystack.size(), needle_1.data(), needle_1.size()); + auto result_2 = AK::memmem(haystack.data(), haystack.size(), needle_2.data(), needle_2.size()); + auto result_3 = AK::memmem(haystack.data(), haystack.size(), needle_3.data(), needle_3.size()); + + EXPECT_EQ(result_0, &haystack[3]); + EXPECT_EQ(result_1, &haystack[2]); + EXPECT_EQ(result_2, &haystack[4]); + EXPECT_EQ(result_3, nullptr); +} + +TEST_CASE(kmp_one_chunk) +{ + Array haystack { 1, 0, 1, 2, 3, 4, 5, 0 }; + Array, 1> haystack_arr { haystack }; + Array needle_0 { 2, 3, 4, 5 }; + Array needle_1 { 1, 2, 3, 4 }; + Array needle_2 { 3, 4, 5, 0 }; + Array needle_3 { 3, 4, 5, 6 }; + + auto result_0 = AK::memmem(haystack_arr.begin(), haystack_arr.end(), needle_0); + auto result_1 = AK::memmem(haystack_arr.begin(), haystack_arr.end(), needle_1); + auto result_2 = AK::memmem(haystack_arr.begin(), haystack_arr.end(), needle_2); + auto result_3 = AK::memmem(haystack_arr.begin(), haystack_arr.end(), needle_3); + + EXPECT_EQ(result_0.value_or(9), 3u); + EXPECT_EQ(result_1.value_or(9), 2u); + EXPECT_EQ(result_2.value_or(9), 4u); + EXPECT(!result_3.has_value()); +} + +TEST_CASE(kmp_two_chunks) +{ + Array haystack_first_half { 1, 0, 1, 2 }, haystack_second_half { 3, 4, 5, 0 }; + Array, 2> haystack { haystack_first_half, haystack_second_half }; + Array needle_0 { 2, 3, 4, 5 }; + Array needle_1 { 1, 2, 3, 4 }; + Array needle_2 { 3, 4, 5, 0 }; + Array needle_3 { 3, 4, 5, 6 }; + + auto result_0 = AK::memmem(haystack.begin(), haystack.end(), needle_0); + auto result_1 = AK::memmem(haystack.begin(), haystack.end(), needle_1); + auto result_2 = AK::memmem(haystack.begin(), haystack.end(), needle_2); + auto result_3 = AK::memmem(haystack.begin(), haystack.end(), needle_3); + + EXPECT_EQ(result_0.value_or(9), 3u); + EXPECT_EQ(result_1.value_or(9), 2u); + EXPECT_EQ(result_2.value_or(9), 4u); + EXPECT(!result_3.has_value()); +} + +TEST_MAIN(MemMem)