1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 09:04:59 +00:00

LibC: Implement posix_memalign(3) and aligned_alloc(3)

Some ports linked against posix_memalign, but didn't use it, and others
used it if it was Available. So I decided to implement posix_memalign.

My implementation adds almost no overhead to regular mallocs. However,
if an alignment is specified, it will use the smallest ChunkedBlock, for
which aligned chunks exist, and simply use one of the chunks that is
aligned. If it cannot use a ChunkedBlock, for size or alignment reasons,
it will use a BigAllocationBlock, and return a pointer to the first
aligned address past the start of the block. This implementation
supports alignments up to 32768, due to the limitations of the
BigAllocationBlock technique.
This commit is contained in:
Peter Elliott 2022-05-04 23:08:57 -06:00 committed by Andreas Kling
parent 830f5c610d
commit 4e0adb638d
4 changed files with 172 additions and 60 deletions

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2022, Peter Elliott <pelliott@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -153,12 +154,75 @@ static inline BigAllocator (&big_allocators())[1]
return reinterpret_cast<BigAllocator(&)[1]>(g_big_allocators_storage);
}
static Allocator* allocator_for_size(size_t size, size_t& good_size)
// --- BEGIN MATH ---
// This stuff is only used for checking if there exists an aligned block in a
// chunk. It has no bearing on the rest of the allocator, especially for
// regular malloc.
static inline unsigned long modulo(long a, long b)
{
return (b + (a % b)) % b;
}
struct EuclideanResult {
long x;
long y;
long gcd;
};
// Returns x, y, gcd.
static inline EuclideanResult extended_euclid(long a, long b)
{
EuclideanResult old = { 1, 0, a };
EuclideanResult current = { 0, 1, b };
while (current.gcd != 0) {
long quotient = old.gcd / current.gcd;
EuclideanResult next = {
old.x - quotient * current.x,
old.y - quotient * current.y,
old.gcd - quotient * current.gcd,
};
old = current;
current = next;
}
return old;
}
static inline bool block_has_aligned_chunk(long align, long bytes_per_chunk, long chunk_capacity)
{
// Never do math on a normal malloc.
if ((size_t)align <= sizeof(ChunkedBlock))
return true;
// Solve the linear congruence n*bytes_per_chunk = -sizeof(ChunkedBlock) (mod align).
auto [x, y, gcd] = extended_euclid(bytes_per_chunk % align, align);
long constant = modulo(-sizeof(ChunkedBlock), align);
if (constant % gcd != 0)
// No solution. Chunk size is probably a multiple of align.
return false;
long n = modulo(x * (constant / gcd), align);
if (x < 0)
n = (n + align / gcd) % align;
// Don't ask me to prove this.
VERIFY(n > 0);
return n < chunk_capacity;
}
// --- END MATH ---
static Allocator* allocator_for_size(size_t size, size_t& good_size, size_t align = 1)
{
for (size_t i = 0; size_classes[i]; ++i) {
if (size <= size_classes[i]) {
auto& allocator = allocators()[i];
if (size <= size_classes[i] && block_has_aligned_chunk(align, allocator.size, (ChunkedBlock::block_size - sizeof(ChunkedBlock)) / allocator.size)) {
good_size = size_classes[i];
return &allocators()[i];
return &allocator;
}
}
good_size = PAGE_ROUND_UP(size);
@ -176,7 +240,7 @@ static BigAllocator* big_allocator_for_size(size_t size)
extern "C" {
static void* os_alloc(size_t size, char const* name)
static ErrorOr<void*> os_alloc(size_t size, char const* name)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_PURGEABLE;
#if ARCH(X86_64)
@ -185,8 +249,7 @@ static void* os_alloc(size_t size, char const* name)
auto* ptr = serenity_mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, 0, 0, ChunkedBlock::block_size, name);
VERIFY(ptr != nullptr);
if (ptr == MAP_FAILED) {
errno = ENOMEM;
return nullptr;
return ENOMEM;
}
return ptr;
}
@ -197,6 +260,31 @@ static void os_free(void* ptr, size_t size)
assert(rc == 0);
}
static void* try_allocate_chunk_aligned(size_t align, ChunkedBlock& block)
{
// These loops are guaranteed to run only once for a standard-aligned malloc.
for (FreelistEntry** entry = &(block.m_freelist); *entry != nullptr; entry = &((*entry)->next)) {
if ((reinterpret_cast<uintptr_t>(*entry) & (align - 1)) == 0) {
--block.m_free_chunks;
void* ptr = *entry;
*entry = (*entry)->next; // Delete the entry.
return ptr;
}
}
for (; block.m_next_lazy_freelist_index < block.chunk_capacity(); block.m_next_lazy_freelist_index++) {
void* ptr = block.m_slot + block.m_next_lazy_freelist_index * block.m_size;
if ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0) {
--block.m_free_chunks;
block.m_next_lazy_freelist_index++;
return ptr;
}
auto* entry = (FreelistEntry*)ptr;
entry->next = block.m_freelist;
block.m_freelist = entry;
}
return nullptr;
}
enum class CallerWillInitializeMemory {
No,
Yes,
@ -208,12 +296,20 @@ enum class CallerWillInitializeMemory {
__thread bool s_allocation_enabled;
#endif
static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory)
static ErrorOr<void*> malloc_impl(size_t size, size_t align, CallerWillInitializeMemory caller_will_initialize_memory)
{
#ifndef NO_TLS
VERIFY(s_allocation_enabled);
#endif
// Align must be a power of 2.
if (popcount(align) != 1)
return EINVAL;
// FIXME: Support larger than 32KiB alignments (if you dare).
if (sizeof(BigAllocationBlock) + align >= ChunkedBlock::block_size)
return EINVAL;
if (s_log_malloc)
dbgln("LibC: malloc({})", size);
@ -226,16 +322,15 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
g_malloc_stats.number_of_malloc_calls++;
size_t good_size;
auto* allocator = allocator_for_size(size, good_size);
auto* allocator = allocator_for_size(size, good_size, align);
PthreadMutexLocker locker(s_malloc_mutex);
if (!allocator) {
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size, ChunkedBlock::block_size);
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size + ((align > 16) ? align : 0), ChunkedBlock::block_size);
if (real_size < size) {
dbgln_if(MALLOC_DEBUG, "LibC: Detected overflow trying to do big allocation of size {} for {}", real_size, size);
errno = ENOMEM;
return nullptr;
return ENOMEM;
}
#ifdef RECYCLE_BIG_ALLOCATIONS
if (auto* allocator = big_allocator_for_size(real_size)) {
@ -257,27 +352,31 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
new (block) BigAllocationBlock(real_size);
}
ue_notify_malloc(&block->m_slot[0], size);
return &block->m_slot[0];
void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align));
ue_notify_malloc(ptr, size);
return ptr;
}
}
#endif
auto* block = (BigAllocationBlock*)os_alloc(real_size, "malloc: BigAllocationBlock");
if (block == nullptr) {
dbgln_if(MALLOC_DEBUG, "LibC: Failed to do big allocation of size {} for {}", real_size, size);
return nullptr;
}
auto* block = (BigAllocationBlock*)TRY(os_alloc(real_size, "malloc: BigAllocationBlock"));
g_malloc_stats.number_of_big_allocs++;
new (block) BigAllocationBlock(real_size);
ue_notify_malloc(&block->m_slot[0], size);
return &block->m_slot[0];
void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align));
ue_notify_malloc(ptr, size);
return ptr;
}
ChunkedBlock* block = nullptr;
void* ptr = nullptr;
for (auto& current : allocator->usable_blocks) {
if (current.free_chunks()) {
block = &current;
break;
ptr = try_allocate_chunk_aligned(align, current);
if (ptr) {
block = &current;
break;
}
}
}
@ -321,23 +420,16 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
g_malloc_stats.number_of_block_allocs++;
char buffer[64];
snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size);
block = (ChunkedBlock*)os_alloc(ChunkedBlock::block_size, buffer);
if (block == nullptr) {
return nullptr;
}
block = (ChunkedBlock*)TRY(os_alloc(ChunkedBlock::block_size, buffer));
new (block) ChunkedBlock(good_size);
allocator->usable_blocks.append(*block);
++allocator->block_count;
}
--block->m_free_chunks;
void* ptr = block->m_freelist;
if (ptr) {
block->m_freelist = block->m_freelist->next;
} else {
ptr = block->m_slot + block->m_next_lazy_freelist_index * block->m_size;
block->m_next_lazy_freelist_index++;
if (!ptr) {
ptr = try_allocate_chunk_aligned(align, *block);
}
VERIFY(ptr);
if (block->is_full()) {
g_malloc_stats.number_of_blocks_full++;
@ -451,14 +543,21 @@ static void free_impl(void* ptr)
void* malloc(size_t size)
{
MemoryAuditingSuppressor suppressor;
void* ptr = malloc_impl(size, CallerWillInitializeMemory::No);
auto ptr_or_error = malloc_impl(size, 16, CallerWillInitializeMemory::No);
if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}
if (s_profiling)
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr));
return ptr;
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr_or_error.value()));
return ptr_or_error.value();
}
// This is a Microsoft extension, and is not found on other Unix-like systems.
// FIXME: Implement aligned_alloc() instead
// FIXME: Remove this when all patches have been switched to aligned_alloc()
//
// This is used in libc++ to implement C++17 aligned new/delete.
//
@ -513,10 +612,41 @@ void* calloc(size_t count, size_t size)
return nullptr;
}
size_t new_size = count * size;
auto* ptr = malloc_impl(new_size, CallerWillInitializeMemory::Yes);
if (ptr)
memset(ptr, 0, new_size);
return ptr;
auto ptr_or_error = malloc_impl(new_size, 16, CallerWillInitializeMemory::Yes);
if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}
memset(ptr_or_error.value(), 0, new_size);
return ptr_or_error.value();
}
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
int posix_memalign(void** memptr, size_t alignment, size_t size)
{
MemoryAuditingSuppressor suppressor;
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No);
if (ptr_or_error.is_error())
return ptr_or_error.error().code();
*memptr = ptr_or_error.value();
return 0;
}
void* aligned_alloc(size_t alignment, size_t size)
{
MemoryAuditingSuppressor suppressor;
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No);
if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}
return ptr_or_error.value();
}
size_t malloc_size(void const* ptr)