mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 17:02:45 +00:00 
			
		
		
		
	Kernel+LibC: Implement 'memmem'
This commit adds an implementation of memmem, using the Bitap text search algorithm for needles smaller than 32 bytes, and a naive loop search for longer needles.
This commit is contained in:
		
							parent
							
								
									ad35436786
								
							
						
					
					
						commit
						1ad51325ad
					
				
					 5 changed files with 178 additions and 0 deletions
				
			
		|  | @ -39,6 +39,34 @@ String copy_string_from_user(const char* user_str, size_t user_str_size) | ||||||
|     return String(user_str, length); |     return String(user_str, length); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace { | ||||||
|  | const static void* bitap_bitwise(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||||
|  | { | ||||||
|  |     ASSERT(needle_length < 32); | ||||||
|  | 
 | ||||||
|  |     u64 lookup = 0xfffffffe; | ||||||
|  | 
 | ||||||
|  |     constexpr size_t mask_length = (size_t)((u8)-1) + 1; | ||||||
|  |     u64 needle_mask[mask_length]; | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < mask_length; ++i) | ||||||
|  |         needle_mask[i] = 0xffffffff; | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < needle_length; ++i) | ||||||
|  |         needle_mask[((const u8*)needle)[i]] &= ~(0x00000001 << i); | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < haystack_length; ++i) { | ||||||
|  |         lookup |= needle_mask[((const u8*)haystack)[i]]; | ||||||
|  |         lookup <<= 1; | ||||||
|  | 
 | ||||||
|  |         if (!(lookup & (0x00000001 << needle_length))) | ||||||
|  |             return ((const u8*)haystack) + i - needle_length + 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| extern "C" { | extern "C" { | ||||||
| 
 | 
 | ||||||
| void copy_to_user(void* dest_ptr, const void* src_ptr, size_t n) | void copy_to_user(void* dest_ptr, const void* src_ptr, size_t n) | ||||||
|  | @ -91,6 +119,31 @@ void* memmove(void* dest, const void* src, size_t n) | ||||||
|     return dest; |     return dest; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | const void* memmem(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||||
|  | { | ||||||
|  |     if (needle_length == 0) | ||||||
|  |         return haystack; | ||||||
|  | 
 | ||||||
|  |     if (haystack_length < needle_length) | ||||||
|  |         return nullptr; | ||||||
|  | 
 | ||||||
|  |     if (haystack_length == needle_length) | ||||||
|  |         return memcmp(haystack, needle, haystack_length) == 0 ? haystack : nullptr; | ||||||
|  | 
 | ||||||
|  |     if (needle_length < 32) | ||||||
|  |         return bitap_bitwise(haystack, haystack_length, needle, needle_length); | ||||||
|  | 
 | ||||||
|  |     // Fallback to a slower search.
 | ||||||
|  |     auto length_diff = haystack_length - needle_length; | ||||||
|  |     for (size_t i = 0; i < length_diff; ++i) { | ||||||
|  |         const auto* start = ((const u8*)haystack) + i; | ||||||
|  |         if (memcmp(start, needle, needle_length) == 0) | ||||||
|  |             return start; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| char* strcpy(char* dest, const char* src) | char* strcpy(char* dest, const char* src) | ||||||
| { | { | ||||||
|     auto* dest_ptr = dest; |     auto* dest_ptr = dest; | ||||||
|  |  | ||||||
|  | @ -54,6 +54,7 @@ char* strdup(const char*); | ||||||
| int memcmp(const void*, const void*, size_t); | int memcmp(const void*, const void*, size_t); | ||||||
| char* strrchr(const char* str, int ch); | char* strrchr(const char* str, int ch); | ||||||
| void* memmove(void* dest, const void* src, size_t n); | void* memmove(void* dest, const void* src, size_t n); | ||||||
|  | const void* memmem(const void* haystack, size_t, const void* needle, size_t); | ||||||
| 
 | 
 | ||||||
| inline u16 ntohs(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | inline u16 ntohs(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | ||||||
| inline u16 htons(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | inline u16 htons(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | ||||||
|  |  | ||||||
|  | @ -189,6 +189,59 @@ void* memmove(void* dest, const void* src, size_t n) | ||||||
|     return dest; |     return dest; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace { | ||||||
|  | const static void* bitap_bitwise(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||||
|  | { | ||||||
|  |     ASSERT(needle_length < 32); | ||||||
|  | 
 | ||||||
|  |     u64 lookup = 0xfffffffe; | ||||||
|  | 
 | ||||||
|  |     constexpr size_t mask_length = (size_t)((u8)-1) + 1; | ||||||
|  |     u64 needle_mask[mask_length]; | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < mask_length; ++i) | ||||||
|  |         needle_mask[i] = 0xffffffff; | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < needle_length; ++i) | ||||||
|  |         needle_mask[((const u8*)needle)[i]] &= ~(0x00000001 << i); | ||||||
|  | 
 | ||||||
|  |     for (size_t i = 0; i < haystack_length; ++i) { | ||||||
|  |         lookup |= needle_mask[((const u8*)haystack)[i]]; | ||||||
|  |         lookup <<= 1; | ||||||
|  | 
 | ||||||
|  |         if (!(lookup & (0x00000001 << needle_length))) | ||||||
|  |             return ((const u8*)haystack) + i - needle_length + 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const void* memmem(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||||
|  | { | ||||||
|  |     if (needle_length == 0) | ||||||
|  |         return haystack; | ||||||
|  | 
 | ||||||
|  |     if (haystack_length < needle_length) | ||||||
|  |         return nullptr; | ||||||
|  | 
 | ||||||
|  |     if (haystack_length == needle_length) | ||||||
|  |         return memcmp(haystack, needle, haystack_length) == 0 ? haystack : nullptr; | ||||||
|  | 
 | ||||||
|  |     if (needle_length < 32) | ||||||
|  |         return bitap_bitwise(haystack, haystack_length, needle, needle_length); | ||||||
|  | 
 | ||||||
|  |     // Fallback to a slower search.
 | ||||||
|  |     auto length_diff = haystack_length - needle_length; | ||||||
|  |     for (size_t i = 0; i < length_diff; ++i) { | ||||||
|  |         const auto* start = ((const u8*)haystack) + i; | ||||||
|  |         if (memcmp(start, needle, needle_length) == 0) | ||||||
|  |             return start; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| char* strcpy(char* dest, const char* src) | char* strcpy(char* dest, const char* src) | ||||||
| { | { | ||||||
|     char* originalDest = dest; |     char* originalDest = dest; | ||||||
|  |  | ||||||
|  | @ -41,6 +41,7 @@ int memcmp(const void*, const void*, size_t); | ||||||
| void* memcpy(void*, const void*, size_t); | void* memcpy(void*, const void*, size_t); | ||||||
| void* memmove(void*, const void*, size_t); | void* memmove(void*, const void*, size_t); | ||||||
| void* memchr(const void*, int c, size_t); | void* memchr(const void*, int c, size_t); | ||||||
|  | const void* memmem(const void* haystack, size_t, const void* needle, size_t); | ||||||
| void bzero(void*, size_t); | void bzero(void*, size_t); | ||||||
| void bcopy(const void*, void*, size_t); | void bcopy(const void*, void*, size_t); | ||||||
| void* memset(void*, int, size_t); | void* memset(void*, int, size_t); | ||||||
|  |  | ||||||
							
								
								
									
										70
									
								
								Tests/LibC/memmem-tests.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								Tests/LibC/memmem-tests.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2020, Ali Mohammad Pur <ali.mpfard@gmail.com> | ||||||
|  |  * All rights reserved. | ||||||
|  |  * | ||||||
|  |  * Redistribution and use in source and binary forms, with or without | ||||||
|  |  * modification, are permitted provided that the following conditions are met: | ||||||
|  |  * | ||||||
|  |  * 1. Redistributions of source code must retain the above copyright notice, this | ||||||
|  |  *    list of conditions and the following disclaimer. | ||||||
|  |  * | ||||||
|  |  * 2. Redistributions in binary form must reproduce the above copyright notice, | ||||||
|  |  *    this list of conditions and the following disclaimer in the documentation | ||||||
|  |  *    and/or other materials provided with the distribution. | ||||||
|  |  * | ||||||
|  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||||
|  |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||||
|  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||||
|  |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||||||
|  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||||
|  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||||||
|  |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||||||
|  |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||||||
|  |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <AK/Types.h> | ||||||
|  | #include <assert.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <string.h> | ||||||
|  | #include <unistd.h> | ||||||
|  | 
 | ||||||
|  | struct TestCase { | ||||||
|  |     const u8* haystack; | ||||||
|  |     size_t haystack_length; | ||||||
|  |     const u8* needle; | ||||||
|  |     size_t needle_length; | ||||||
|  |     ssize_t matching_offset { -1 }; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | const static TestCase g_test_cases[] = { | ||||||
|  |     { (const u8*) {}, 0u, (const u8*) {}, 0u, 0 }, | ||||||
|  |     { (const u8[]) { 1, 2, 3 }, 3u, (const u8[]) { 1, 2, 3 }, 3u, 0 }, | ||||||
|  |     { (const u8[]) { 1, 2, 4 }, 3u, (const u8[]) { 1, 2, 3 }, 3u, -1 }, | ||||||
|  |     { (const u8*)"abcdef", 6u, (const u8[]) {}, 0u, 0 }, | ||||||
|  |     { (const u8*)"abcdef", 6u, (const u8*)"de", 2u, 3 }, | ||||||
|  |     { (const u8[]) { 0, 1, 2, 5, 2, 5 }, 6u, (const u8[]) { 1 }, 1u, 1 }, | ||||||
|  |     { (const u8[]) { 0, 1, 2, 5, 2, 5 }, 6u, (const u8[]) { 1, 2 }, 2u, 1 }, | ||||||
|  |     { (const u8[]) { 0, 1, 1, 2 }, 4u, (const u8[]) { 1, 5 }, 2u, -1 }, | ||||||
|  |     { (const u8[64]) { 0 }, 64u, (const u8[33]) { 0 }, 33u, 0 }, | ||||||
|  |     { (const u8[64]) { 0, 1, 1, 2 }, 64u, (const u8[33]) { 1, 1 }, 2u, 1 }, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | int main() | ||||||
|  | { | ||||||
|  |     bool failed = false; | ||||||
|  |     size_t i = 0; | ||||||
|  |     for (const auto& test_case : g_test_cases) { | ||||||
|  |         auto expected = test_case.matching_offset >= 0 ? test_case.haystack + test_case.matching_offset : nullptr; | ||||||
|  |         auto result = memmem(test_case.haystack, test_case.haystack_length, test_case.needle, test_case.needle_length); | ||||||
|  |         if (result != expected) { | ||||||
|  |             failed = true; | ||||||
|  |             fprintf(stderr, "Test %zu FAILED! expected %p, got %p\n", i, expected, result); | ||||||
|  |         } | ||||||
|  |         ++i; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return failed ? 1 : 0; | ||||||
|  | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 AnotherTest
						AnotherTest