mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 22:57:44 +00:00
LibC: Implement a faster memset routine for x86-64 in assembly
This commit addresses the following shortcomings of our current, simple and elegant memset function: - REP STOSB/STOSQ has considerable startup overhead, it's impractical to use for smaller sizes. - Up until very recently, AMD CPUs didn't have support for "Enhanced REP MOVSB/STOSB", so it performed pretty poorly on them. With this commit applied, I could measure a ~5% decrease in `test-js`'s runtime when I used qemu's TCG backend. The implementation is based on the following article from Microsoft: https://msrc-blog.microsoft.com/2021/01/11/building-faster-amd64-memset-routines Two versions of the routine are implemented: one that uses the ERMS extension mentioned above, and one that performs plain SSE stores. The version appropriate for the CPU is selected at load time using an IFUNC.
This commit is contained in:
parent
484f70fb43
commit
bcf124c07d
5 changed files with 262 additions and 10 deletions
|
@ -84,7 +84,8 @@ elseif ("${SERENITY_ARCH}" STREQUAL "i686")
|
|||
set(CRTI_SOURCE "arch/i386/crti.S")
|
||||
set(CRTN_SOURCE "arch/i386/crtn.S")
|
||||
elseif ("${SERENITY_ARCH}" STREQUAL "x86_64")
|
||||
set(ASM_SOURCES "arch/x86_64/setjmp.S")
|
||||
set(LIBC_SOURCES ${LIBC_SOURCES} "arch/x86_64/memset.cpp")
|
||||
set(ASM_SOURCES "arch/x86_64/setjmp.S" "arch/x86_64/memset.S")
|
||||
set(ELF_SOURCES ${ELF_SOURCES} ../LibELF/Arch/x86_64/entry.S ../LibELF/Arch/x86_64/plt_trampoline.S)
|
||||
set(CRTI_SOURCE "arch/x86_64/crti.S")
|
||||
set(CRTN_SOURCE "arch/x86_64/crtn.S")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue