mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 09:52:44 +00:00 
			
		
		
		
	Kernel+LibC: Implement 'memmem'
This commit adds an implementation of memmem, using the Bitap text search algorithm for needles smaller than 32 bytes, and a naive loop search for longer needles.
This commit is contained in:
		
							parent
							
								
									ad35436786
								
							
						
					
					
						commit
						1ad51325ad
					
				
					 5 changed files with 178 additions and 0 deletions
				
			
		|  | @ -39,6 +39,34 @@ String copy_string_from_user(const char* user_str, size_t user_str_size) | |||
|     return String(user_str, length); | ||||
| } | ||||
| 
 | ||||
| namespace { | ||||
| const static void* bitap_bitwise(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||
| { | ||||
|     ASSERT(needle_length < 32); | ||||
| 
 | ||||
|     u64 lookup = 0xfffffffe; | ||||
| 
 | ||||
|     constexpr size_t mask_length = (size_t)((u8)-1) + 1; | ||||
|     u64 needle_mask[mask_length]; | ||||
| 
 | ||||
|     for (size_t i = 0; i < mask_length; ++i) | ||||
|         needle_mask[i] = 0xffffffff; | ||||
| 
 | ||||
|     for (size_t i = 0; i < needle_length; ++i) | ||||
|         needle_mask[((const u8*)needle)[i]] &= ~(0x00000001 << i); | ||||
| 
 | ||||
|     for (size_t i = 0; i < haystack_length; ++i) { | ||||
|         lookup |= needle_mask[((const u8*)haystack)[i]]; | ||||
|         lookup <<= 1; | ||||
| 
 | ||||
|         if (!(lookup & (0x00000001 << needle_length))) | ||||
|             return ((const u8*)haystack) + i - needle_length + 1; | ||||
|     } | ||||
| 
 | ||||
|     return nullptr; | ||||
| } | ||||
| } | ||||
| 
 | ||||
| extern "C" { | ||||
| 
 | ||||
| void copy_to_user(void* dest_ptr, const void* src_ptr, size_t n) | ||||
|  | @ -91,6 +119,31 @@ void* memmove(void* dest, const void* src, size_t n) | |||
|     return dest; | ||||
| } | ||||
| 
 | ||||
| const void* memmem(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||
| { | ||||
|     if (needle_length == 0) | ||||
|         return haystack; | ||||
| 
 | ||||
|     if (haystack_length < needle_length) | ||||
|         return nullptr; | ||||
| 
 | ||||
|     if (haystack_length == needle_length) | ||||
|         return memcmp(haystack, needle, haystack_length) == 0 ? haystack : nullptr; | ||||
| 
 | ||||
|     if (needle_length < 32) | ||||
|         return bitap_bitwise(haystack, haystack_length, needle, needle_length); | ||||
| 
 | ||||
|     // Fallback to a slower search.
 | ||||
|     auto length_diff = haystack_length - needle_length; | ||||
|     for (size_t i = 0; i < length_diff; ++i) { | ||||
|         const auto* start = ((const u8*)haystack) + i; | ||||
|         if (memcmp(start, needle, needle_length) == 0) | ||||
|             return start; | ||||
|     } | ||||
| 
 | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| char* strcpy(char* dest, const char* src) | ||||
| { | ||||
|     auto* dest_ptr = dest; | ||||
|  |  | |||
|  | @ -54,6 +54,7 @@ char* strdup(const char*); | |||
| int memcmp(const void*, const void*, size_t); | ||||
| char* strrchr(const char* str, int ch); | ||||
| void* memmove(void* dest, const void* src, size_t n); | ||||
| const void* memmem(const void* haystack, size_t, const void* needle, size_t); | ||||
| 
 | ||||
| inline u16 ntohs(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | ||||
| inline u16 htons(u16 w) { return (w & 0xff) << 8 | ((w >> 8) & 0xff); } | ||||
|  |  | |||
|  | @ -189,6 +189,59 @@ void* memmove(void* dest, const void* src, size_t n) | |||
|     return dest; | ||||
| } | ||||
| 
 | ||||
| namespace { | ||||
| const static void* bitap_bitwise(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||
| { | ||||
|     ASSERT(needle_length < 32); | ||||
| 
 | ||||
|     u64 lookup = 0xfffffffe; | ||||
| 
 | ||||
|     constexpr size_t mask_length = (size_t)((u8)-1) + 1; | ||||
|     u64 needle_mask[mask_length]; | ||||
| 
 | ||||
|     for (size_t i = 0; i < mask_length; ++i) | ||||
|         needle_mask[i] = 0xffffffff; | ||||
| 
 | ||||
|     for (size_t i = 0; i < needle_length; ++i) | ||||
|         needle_mask[((const u8*)needle)[i]] &= ~(0x00000001 << i); | ||||
| 
 | ||||
|     for (size_t i = 0; i < haystack_length; ++i) { | ||||
|         lookup |= needle_mask[((const u8*)haystack)[i]]; | ||||
|         lookup <<= 1; | ||||
| 
 | ||||
|         if (!(lookup & (0x00000001 << needle_length))) | ||||
|             return ((const u8*)haystack) + i - needle_length + 1; | ||||
|     } | ||||
| 
 | ||||
|     return nullptr; | ||||
| } | ||||
| } | ||||
| 
 | ||||
| const void* memmem(const void* haystack, size_t haystack_length, const void* needle, size_t needle_length) | ||||
| { | ||||
|     if (needle_length == 0) | ||||
|         return haystack; | ||||
| 
 | ||||
|     if (haystack_length < needle_length) | ||||
|         return nullptr; | ||||
| 
 | ||||
|     if (haystack_length == needle_length) | ||||
|         return memcmp(haystack, needle, haystack_length) == 0 ? haystack : nullptr; | ||||
| 
 | ||||
|     if (needle_length < 32) | ||||
|         return bitap_bitwise(haystack, haystack_length, needle, needle_length); | ||||
| 
 | ||||
|     // Fallback to a slower search.
 | ||||
|     auto length_diff = haystack_length - needle_length; | ||||
|     for (size_t i = 0; i < length_diff; ++i) { | ||||
|         const auto* start = ((const u8*)haystack) + i; | ||||
|         if (memcmp(start, needle, needle_length) == 0) | ||||
|             return start; | ||||
|     } | ||||
| 
 | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| char* strcpy(char* dest, const char* src) | ||||
| { | ||||
|     char* originalDest = dest; | ||||
|  |  | |||
|  | @ -41,6 +41,7 @@ int memcmp(const void*, const void*, size_t); | |||
| void* memcpy(void*, const void*, size_t); | ||||
| void* memmove(void*, const void*, size_t); | ||||
| void* memchr(const void*, int c, size_t); | ||||
| const void* memmem(const void* haystack, size_t, const void* needle, size_t); | ||||
| void bzero(void*, size_t); | ||||
| void bcopy(const void*, void*, size_t); | ||||
| void* memset(void*, int, size_t); | ||||
|  |  | |||
							
								
								
									
										70
									
								
								Tests/LibC/memmem-tests.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								Tests/LibC/memmem-tests.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2020, Ali Mohammad Pur <ali.mpfard@gmail.com> | ||||
|  * All rights reserved. | ||||
|  * | ||||
|  * Redistribution and use in source and binary forms, with or without | ||||
|  * modification, are permitted provided that the following conditions are met: | ||||
|  * | ||||
|  * 1. Redistributions of source code must retain the above copyright notice, this | ||||
|  *    list of conditions and the following disclaimer. | ||||
|  * | ||||
|  * 2. Redistributions in binary form must reproduce the above copyright notice, | ||||
|  *    this list of conditions and the following disclaimer in the documentation | ||||
|  *    and/or other materials provided with the distribution. | ||||
|  * | ||||
|  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
|  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||
|  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||||
|  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||||
|  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||||
|  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||||
|  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
|  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
|  */ | ||||
| 
 | ||||
| #include <AK/Types.h> | ||||
| #include <assert.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| struct TestCase { | ||||
|     const u8* haystack; | ||||
|     size_t haystack_length; | ||||
|     const u8* needle; | ||||
|     size_t needle_length; | ||||
|     ssize_t matching_offset { -1 }; | ||||
| }; | ||||
| 
 | ||||
| const static TestCase g_test_cases[] = { | ||||
|     { (const u8*) {}, 0u, (const u8*) {}, 0u, 0 }, | ||||
|     { (const u8[]) { 1, 2, 3 }, 3u, (const u8[]) { 1, 2, 3 }, 3u, 0 }, | ||||
|     { (const u8[]) { 1, 2, 4 }, 3u, (const u8[]) { 1, 2, 3 }, 3u, -1 }, | ||||
|     { (const u8*)"abcdef", 6u, (const u8[]) {}, 0u, 0 }, | ||||
|     { (const u8*)"abcdef", 6u, (const u8*)"de", 2u, 3 }, | ||||
|     { (const u8[]) { 0, 1, 2, 5, 2, 5 }, 6u, (const u8[]) { 1 }, 1u, 1 }, | ||||
|     { (const u8[]) { 0, 1, 2, 5, 2, 5 }, 6u, (const u8[]) { 1, 2 }, 2u, 1 }, | ||||
|     { (const u8[]) { 0, 1, 1, 2 }, 4u, (const u8[]) { 1, 5 }, 2u, -1 }, | ||||
|     { (const u8[64]) { 0 }, 64u, (const u8[33]) { 0 }, 33u, 0 }, | ||||
|     { (const u8[64]) { 0, 1, 1, 2 }, 64u, (const u8[33]) { 1, 1 }, 2u, 1 }, | ||||
| }; | ||||
| 
 | ||||
| int main() | ||||
| { | ||||
|     bool failed = false; | ||||
|     size_t i = 0; | ||||
|     for (const auto& test_case : g_test_cases) { | ||||
|         auto expected = test_case.matching_offset >= 0 ? test_case.haystack + test_case.matching_offset : nullptr; | ||||
|         auto result = memmem(test_case.haystack, test_case.haystack_length, test_case.needle, test_case.needle_length); | ||||
|         if (result != expected) { | ||||
|             failed = true; | ||||
|             fprintf(stderr, "Test %zu FAILED! expected %p, got %p\n", i, expected, result); | ||||
|         } | ||||
|         ++i; | ||||
|     } | ||||
| 
 | ||||
|     return failed ? 1 : 0; | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 AnotherTest
						AnotherTest