1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 09:14:58 +00:00

LibC: Implement a faster memset routine for x86-64 in assembly

This commit addresses the following shortcomings of our current, simple
and elegant memset function:
- REP STOSB/STOSQ has considerable startup overhead, it's impractical to
  use for smaller sizes.
- Up until very recently, AMD CPUs didn't have support for "Enhanced REP
  MOVSB/STOSB", so it performed pretty poorly on them.

With this commit applied, I could measure a ~5% decrease in `test-js`'s
runtime when I used qemu's TCG backend. The implementation is based on
the following article from Microsoft:

https://msrc-blog.microsoft.com/2021/01/11/building-faster-amd64-memset-routines

Two versions of the routine are implemented: one that uses the ERMS
extension mentioned above, and one that performs plain SSE stores. The
version appropriate for the CPU is selected at load time using an IFUNC.
This commit is contained in:
Daniel Bertalan 2022-03-24 16:41:18 +01:00 committed by Andreas Kling
parent 484f70fb43
commit bcf124c07d
5 changed files with 262 additions and 10 deletions

View file

@ -137,7 +137,10 @@ void* memcpy(void* dest_ptr, void const* src_ptr, size_t n)
return original_dest;
}
#if ARCH(I386)
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/memset.html
//
// For x86-64, an optimized ASM implementation is found in ./arch/x86_64/memset.S
void* memset(void* dest_ptr, int c, size_t n)
{
size_t dest = (size_t)dest_ptr;
@ -145,19 +148,11 @@ void* memset(void* dest_ptr, int c, size_t n)
if (!(dest & 0x3) && n >= 12) {
size_t size_ts = n / sizeof(size_t);
size_t expanded_c = explode_byte((u8)c);
#if ARCH(I386)
asm volatile(
"rep stosl\n"
: "=D"(dest)
: "D"(dest), "c"(size_ts), "a"(expanded_c)
: "memory");
#else
asm volatile(
"rep stosq\n"
: "=D"(dest)
: "D"(dest), "c"(size_ts), "a"(expanded_c)
: "memory");
#endif
n -= size_ts * sizeof(size_t);
if (n == 0)
return dest_ptr;
@ -169,6 +164,7 @@ void* memset(void* dest_ptr, int c, size_t n)
: "memory");
return dest_ptr;
}
#endif
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/memmove.html
void* memmove(void* dest, void const* src, size_t n)