mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 18:47:34 +00:00
Add a fast memcpy() using MMX when we're moving >= 1KB.
This is a nice speedup for WindowServer. I'll eventually have to do this with SSE but the kernel doesn't support SSE yet so this is it for now.
This commit is contained in:
parent
e29060620f
commit
1f159eaab0
6 changed files with 70 additions and 3 deletions
50
AK/StdLibExtras.cpp
Normal file
50
AK/StdLibExtras.cpp
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#include <AK/StdLibExtras.h>
|
||||||
|
#include <AK/Assertions.h>
|
||||||
|
#include <AK/Types.h>
|
||||||
|
#include <AK/kstdio.h>
|
||||||
|
|
||||||
|
void* mmx_memcpy(void* dest, const void* src, size_t len)
|
||||||
|
{
|
||||||
|
ASSERT(len >= 1024);
|
||||||
|
|
||||||
|
auto* dest_ptr = (byte*)dest;
|
||||||
|
auto* src_ptr = (const byte*)src;
|
||||||
|
|
||||||
|
if ((dword)dest_ptr & 7) {
|
||||||
|
dword prologue = 8 - ((dword)dest_ptr & 7);
|
||||||
|
asm volatile(
|
||||||
|
"rep movsb\n"
|
||||||
|
:: "S"(src_ptr), "D"(dest_ptr), "c"(prologue)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
len -= prologue;
|
||||||
|
}
|
||||||
|
for (dword i = len / 64; i; --i) {
|
||||||
|
asm volatile(
|
||||||
|
"movq (%0), %%mm0\n"
|
||||||
|
"movq 8(%0), %%mm1\n"
|
||||||
|
"movq 16(%0), %%mm2\n"
|
||||||
|
"movq 24(%0), %%mm3\n"
|
||||||
|
"movq 32(%0), %%mm4\n"
|
||||||
|
"movq 40(%0), %%mm5\n"
|
||||||
|
"movq 48(%0), %%mm6\n"
|
||||||
|
"movq 56(%0), %%mm7\n"
|
||||||
|
"movq %%mm0, (%1)\n"
|
||||||
|
"movq %%mm1, 8(%1)\n"
|
||||||
|
"movq %%mm2, 16(%1)\n"
|
||||||
|
"movq %%mm3, 24(%1)\n"
|
||||||
|
"movq %%mm4, 32(%1)\n"
|
||||||
|
"movq %%mm5, 40(%1)\n"
|
||||||
|
"movq %%mm6, 48(%1)\n"
|
||||||
|
"movq %%mm7, 56(%1)\n"
|
||||||
|
:: "r" (src_ptr), "r" (dest_ptr) : "memory");
|
||||||
|
src_ptr += 64;
|
||||||
|
dest_ptr += 64;
|
||||||
|
}
|
||||||
|
asm volatile("emms":::"memory");
|
||||||
|
// Whatever remains we'll have to memcpy.
|
||||||
|
len %= 64;
|
||||||
|
if (len)
|
||||||
|
memcpy(dest_ptr, src_ptr, len);
|
||||||
|
return dest;
|
||||||
|
}
|
|
@ -9,8 +9,14 @@
|
||||||
|
|
||||||
#include <AK/Types.h>
|
#include <AK/Types.h>
|
||||||
|
|
||||||
|
void* mmx_memcpy(void* to, const void* from, size_t);
|
||||||
|
|
||||||
ALWAYS_INLINE void fast_dword_copy(dword* dest, const dword* src, size_t count)
|
ALWAYS_INLINE void fast_dword_copy(dword* dest, const dword* src, size_t count)
|
||||||
{
|
{
|
||||||
|
if (count >= 256) {
|
||||||
|
mmx_memcpy(dest, src, count * sizeof(count));
|
||||||
|
return;
|
||||||
|
}
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"rep movsl\n"
|
"rep movsl\n"
|
||||||
: "=S"(src), "=D"(dest), "=c"(count)
|
: "=S"(src), "=D"(dest), "=c"(count)
|
||||||
|
|
|
@ -69,7 +69,8 @@ AK_OBJS = \
|
||||||
../AK/String.o \
|
../AK/String.o \
|
||||||
../AK/StringImpl.o \
|
../AK/StringImpl.o \
|
||||||
../AK/StringBuilder.o \
|
../AK/StringBuilder.o \
|
||||||
../AK/FileSystemPath.o
|
../AK/FileSystemPath.o \
|
||||||
|
../AK/StdLibExtras.o
|
||||||
|
|
||||||
OBJS = $(KERNEL_OBJS) $(VFS_OBJS) $(AK_OBJS) $(WINDOWSERVER_OBJS) $(SHAREDGRAPHICS_OBJS)
|
OBJS = $(KERNEL_OBJS) $(VFS_OBJS) $(AK_OBJS) $(WINDOWSERVER_OBJS) $(SHAREDGRAPHICS_OBJS)
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,18 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "Assertions.h"
|
#include "Assertions.h"
|
||||||
#include "kmalloc.h"
|
#include "kmalloc.h"
|
||||||
|
#include <AK/StdLibExtras.h>
|
||||||
#include <AK/Types.h>
|
#include <AK/Types.h>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
void memcpy(void *dest_ptr, const void *src_ptr, dword n)
|
void memcpy(void* dest_ptr, const void* src_ptr, dword n)
|
||||||
{
|
{
|
||||||
|
if (n >= 1024) {
|
||||||
|
mmx_memcpy(dest_ptr, src_ptr, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
dword dest = (dword)dest_ptr;
|
dword dest = (dword)dest_ptr;
|
||||||
dword src = (dword)src_ptr;
|
dword src = (dword)src_ptr;
|
||||||
// FIXME: Support starting at an unaligned address.
|
// FIXME: Support starting at an unaligned address.
|
||||||
|
|
|
@ -3,6 +3,7 @@ AK_OBJS = \
|
||||||
../AK/String.o \
|
../AK/String.o \
|
||||||
../AK/StringBuilder.o \
|
../AK/StringBuilder.o \
|
||||||
../AK/FileSystemPath.o \
|
../AK/FileSystemPath.o \
|
||||||
|
../AK/StdLibExtras.o \
|
||||||
../AK/kmalloc.o
|
../AK/kmalloc.o
|
||||||
|
|
||||||
SHAREDGRAPHICS_OBJS = \
|
SHAREDGRAPHICS_OBJS = \
|
||||||
|
|
|
@ -102,8 +102,11 @@ int memcmp(const void* v1, const void* v2, size_t n)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* memcpy(void *dest_ptr, const void *src_ptr, dword n)
|
void* memcpy(void* dest_ptr, const void* src_ptr, dword n)
|
||||||
{
|
{
|
||||||
|
if (n >= 1024)
|
||||||
|
return mmx_memcpy(dest_ptr, src_ptr, n);
|
||||||
|
|
||||||
dword dest = (dword)dest_ptr;
|
dword dest = (dword)dest_ptr;
|
||||||
dword src = (dword)src_ptr;
|
dword src = (dword)src_ptr;
|
||||||
// FIXME: Support starting at an unaligned address.
|
// FIXME: Support starting at an unaligned address.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue