From ad9e674fa0007e22498572467ea0926068db5572 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Wed, 5 Jul 2023 23:58:24 +0200 Subject: [PATCH] LibC+LibELF: Support loading shared libraries compiled with dynamic TLS This is a prerequisite for upstreaming our LLVM patches, as our current hack forcing `-ftls-model=initial-exec` in the Clang driver is not acceptable upstream. Currently, our kernel-managed TLS implementation limits us to only having a single block of storage for all thread-local variables that's initialized at load time. This PR merely implements the dynamic TLS interface (`__tls_get_addr` and TLSDESC) on top of our static TLS infrastructure. The current model's limitations still stand: - a single static TLS block is reserved at load time, `dlopen()`-ing shared libraries that define thread-local variables might cause us to run out of space. - the initial TLS image is not changeable post-load, so `dlopen()`-ing libraries with non-zero-initialized TLS variables is not supported. The way we repurpose `ti_module` to mean "offset within static TLS block" instead of "module index" is not ABI-compliant. --- Userland/DynamicLoader/CMakeLists.txt | 2 + Userland/Libraries/LibC/CMakeLists.txt | 3 +- Userland/Libraries/LibC/sys/internals.h | 10 ++- Userland/Libraries/LibC/tls.cpp | 26 ++++++ Userland/Libraries/LibELF/Arch/aarch64/tls.S | 63 +++++++++++++++ Userland/Libraries/LibELF/DynamicLoader.cpp | 83 +++++++++++++++----- 6 files changed, 166 insertions(+), 21 deletions(-) create mode 100644 Userland/Libraries/LibC/tls.cpp create mode 100644 Userland/Libraries/LibELF/Arch/aarch64/tls.S diff --git a/Userland/DynamicLoader/CMakeLists.txt b/Userland/DynamicLoader/CMakeLists.txt index b61ddad788..e76ff69b68 100644 --- a/Userland/DynamicLoader/CMakeLists.txt +++ b/Userland/DynamicLoader/CMakeLists.txt @@ -13,6 +13,8 @@ file(GLOB LIBC_SOURCES3 "../Libraries/LibC/arch/${ARCH_FOLDER}/*.S") set(ELF_SOURCES ${ELF_SOURCES} "../Libraries/LibELF/Arch/${ARCH_FOLDER}/entry.S" "../Libraries/LibELF/Arch/${ARCH_FOLDER}/plt_trampoline.S") if ("${SERENITY_ARCH}" STREQUAL "x86_64") set(LIBC_SOURCES3 ${LIBC_SOURCES3} "../Libraries/LibC/arch/x86_64/memset.cpp") +elseif ("${SERENITY_ARCH}" STREQUAL "aarch64") + set(ELF_SOURCES ${ELF_SOURCES} "../Libraries/LibELF/Arch/aarch64/tls.S") endif() file(GLOB LIBSYSTEM_SOURCES "../Libraries/LibSystem/*.cpp") diff --git a/Userland/Libraries/LibC/CMakeLists.txt b/Userland/Libraries/LibC/CMakeLists.txt index b42fb30d07..47a48fe261 100644 --- a/Userland/Libraries/LibC/CMakeLists.txt +++ b/Userland/Libraries/LibC/CMakeLists.txt @@ -67,6 +67,7 @@ set(LIBC_SOURCES termios.cpp time.cpp times.cpp + tls.cpp ulimit.cpp unistd.cpp utime.cpp @@ -99,7 +100,7 @@ file(GLOB ELF_SOURCES CONFIGURE_DEPENDS "../LibELF/*.cpp") if ("${SERENITY_ARCH}" STREQUAL "aarch64") set(ASM_SOURCES "arch/aarch64/setjmp.S") - set(ELF_SOURCES ${ELF_SOURCES} ../LibELF/Arch/aarch64/entry.S ../LibELF/Arch/aarch64/plt_trampoline.S) + set(ELF_SOURCES ${ELF_SOURCES} ../LibELF/Arch/aarch64/entry.S ../LibELF/Arch/aarch64/plt_trampoline.S ../LibELF/Arch/aarch64/tls.S) set(CRTI_SOURCE "arch/aarch64/crti.S") set(CRTN_SOURCE "arch/aarch64/crtn.S") elseif ("${SERENITY_ARCH}" STREQUAL "x86_64") diff --git a/Userland/Libraries/LibC/sys/internals.h b/Userland/Libraries/LibC/sys/internals.h index 2b1b9a795d..f4624f3d1a 100644 --- a/Userland/Libraries/LibC/sys/internals.h +++ b/Userland/Libraries/LibC/sys/internals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, the SerenityOS developers. + * Copyright (c) 2020-2023, the SerenityOS developers. * * SPDX-License-Identifier: BSD-2-Clause */ @@ -7,6 +7,7 @@ #pragma once #include +#include __BEGIN_DECLS @@ -28,4 +29,11 @@ __attribute__((noreturn)) void __cxa_pure_virtual(void) __attribute__((weak)); __attribute__((noreturn)) void __stack_chk_fail(void); __attribute__((noreturn)) void __stack_chk_fail_local(void); +struct __tls_index { + size_t ti_module; + size_t ti_offset; +}; + +void* __tls_get_addr(__tls_index*); + __END_DECLS diff --git a/Userland/Libraries/LibC/tls.cpp b/Userland/Libraries/LibC/tls.cpp new file mode 100644 index 0000000000..8eae23dc62 --- /dev/null +++ b/Userland/Libraries/LibC/tls.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023, Daniel Bertalan + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +extern "C" { + +// This function is called to compute the address of a thread-local variable +// which might not be stored in the static TLS block (local-dynamic and +// global-dynamic models). Compilers default to this when creating shared +// libraries, as they may be loaded after program startup by `dlopen()`. +// +// We currently only support a static TLS block, so we take a shortcut in the +// implementation of this interface: instead of storing the module ID in +// ti_module, we store the module's TLS block offset. This avoids the need to +// have a per-thread module ID -> TLS block address. This will have to be +// changed if we support dynamically allocated TLS blocks. +void* __tls_get_addr(__tls_index* index) +{ + return reinterpret_cast(reinterpret_cast(__builtin_thread_pointer()) + index->ti_module + index->ti_offset); +} +} diff --git a/Userland/Libraries/LibELF/Arch/aarch64/tls.S b/Userland/Libraries/LibELF/Arch/aarch64/tls.S new file mode 100644 index 0000000000..f33e23add4 --- /dev/null +++ b/Userland/Libraries/LibELF/Arch/aarch64/tls.S @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023, Daniel Bertalan + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +// This file implements the runtime components of the AArch64 TLSDESC ABI, +// which is used when accessing thread-local variables which might not be +// stored in the static TLS block (global-dynamic and local-dynamic access +// models). Compilers default to this when creating shared libraries, as they +// may be loaded after program startup by `dlopen()`. +// +// Each referenced thread-local symbol is associated with a descriptor: +// +// struct TlsDescriptor { +// size_t (*resolver)(TlsDescriptor*); +// union { +// size_t tpoff; // for static TLS +// struct { +// size_t module_id; +// size_t module_offset; +// } *dynamic; // for dynamic TLS, not yet implemented +// }; +// }; +// +// The resolver takes a pointer to the descriptor as an argument and returns +// the symbol's offset to the thread pointer (tpidr_el1). The second field of +// the descriptor is an implementation-defined value which the resolver uses to +// identify the symbol. +// +// Thus, the address of a thread-local variable is retrieved as follows: +// +// &var = thread_pointer + descriptor.resolver(&descriptor); +// +// The two essential types of resolver functions are: +// +// - `__tlsdesc_static`: If the variable is located in the static TLS block, +// its thread pointer offset is a load-time constant, which can be stored in +// the descriptor. This function simply returns that. +// +// - `tlsdesc_dynamic`: Looks up a variable by its module ID and module offset. +// This is used if the TLS block is allocated separately, so might have a +// different thread pointer offset for each thread. This works similarly to +// the traditional TLS ABI's __tls_get_addr function. Not yet implemented in +// SerenityOS. +// +// The TLSDESC format strives to make the code sequence for thread-local +// variable access as short as possible, hence the resolver functions follow a +// special calling convention: they must not clobber any registers. To ensure +// that even the usually volatile registers are saved off, we need to implement +// the resolvers in assembly. + +// size_t __tlsdesc_static(TlsDescriptor* desc) +// { +// return desc->tpoff; +// } +.p2align 4 +.globl __tlsdesc_static +.hidden __tlsdesc_static +.type __tlsdesc_static,@function +__tlsdesc_static: + ldr x0, [x0, #8] + ret diff --git a/Userland/Libraries/LibELF/DynamicLoader.cpp b/Userland/Libraries/LibELF/DynamicLoader.cpp index cb9a9fea79..b1a8ce808d 100644 --- a/Userland/Libraries/LibELF/DynamicLoader.cpp +++ b/Userland/Libraries/LibELF/DynamicLoader.cpp @@ -2,7 +2,7 @@ * Copyright (c) 2019-2020, Andrew Kaster * Copyright (c) 2020, Itamar S. * Copyright (c) 2021, Andreas Kling - * Copyright (c) 2022, Daniel Bertalan + * Copyright (c) 2022-2023, Daniel Bertalan * * SPDX-License-Identifier: BSD-2-Clause */ @@ -35,6 +35,13 @@ static void* mmap_with_name(void* addr, size_t length, int prot, int flags, int # define MAP_RANDOMIZED 0 #endif +#if ARCH(AARCH64) +# define HAS_TLSDESC_SUPPORT +extern "C" { +void* __tlsdesc_static(void*); +} +#endif + namespace ELF { Result, DlErrorMessage> DynamicLoader::try_create(int fd, DeprecatedString filepath) @@ -527,6 +534,23 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec return VirtualAddress { reinterpret_cast(address.get())() }; }; + struct ResolvedTLSSymbol { + DynamicObject const& dynamic_object; + FlatPtr value; + }; + + auto resolve_tls_symbol = [](DynamicObject::Relocation const& relocation) -> Optional { + if (relocation.symbol_index() == 0) + return ResolvedTLSSymbol { relocation.dynamic_object(), 0 }; + + auto res = lookup_symbol(relocation.symbol()); + if (!res.has_value()) + return {}; + VERIFY(relocation.symbol().type() != STT_GNU_IFUNC); + VERIFY(res.value().dynamic_object != nullptr); + return ResolvedTLSSymbol { *res.value().dynamic_object, res.value().value }; + }; + switch (relocation.type()) { case R_X86_64_NONE: @@ -601,30 +625,51 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec } case R_AARCH64_TLS_TPREL: case R_X86_64_TPOFF64: { - auto symbol = relocation.symbol(); - FlatPtr symbol_value; - DynamicObject const* dynamic_object_of_symbol; - if (relocation.symbol_index() != 0) { - auto res = lookup_symbol(symbol); - if (!res.has_value()) - break; - VERIFY(symbol.type() != STT_GNU_IFUNC); - symbol_value = res.value().value; - dynamic_object_of_symbol = res.value().dynamic_object; - } else { - symbol_value = 0; - dynamic_object_of_symbol = &relocation.dynamic_object(); - } - VERIFY(dynamic_object_of_symbol); - size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; + auto maybe_resolution = resolve_tls_symbol(relocation); + if (!maybe_resolution.has_value()) + break; + auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value(); - *patch_ptr = addend + dynamic_object_of_symbol->tls_offset().value() + symbol_value; + size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; + *patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value; // At offset 0 there's the thread's ThreadSpecificData structure, we don't want to collide with it. VERIFY(static_cast(*patch_ptr) < 0); - break; } + case R_X86_64_DTPMOD64: { + auto maybe_resolution = resolve_tls_symbol(relocation); + if (!maybe_resolution.has_value()) + break; + + // We repurpose the module index to store the TLS block's TP offset. This is fine + // because we currently only support a single static TLS block. + *patch_ptr = maybe_resolution->dynamic_object.tls_offset().value(); + break; + } + case R_X86_64_DTPOFF64: { + auto maybe_resolution = resolve_tls_symbol(relocation); + if (!maybe_resolution.has_value()) + break; + + size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; + *patch_ptr = addend + maybe_resolution->value; + break; + } +#ifdef HAS_TLSDESC_SUPPORT + case R_AARCH64_TLSDESC: { + auto maybe_resolution = resolve_tls_symbol(relocation); + if (!maybe_resolution.has_value()) + break; + auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value(); + + size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr; + + patch_ptr[0] = (FlatPtr)__tlsdesc_static; + patch_ptr[1] = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value; + break; + } +#endif case R_AARCH64_IRELATIVE: case R_X86_64_IRELATIVE: { if (should_call_ifunc_resolver == ShouldCallIfuncResolver::No)