1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 15:47:44 +00:00

LibELF+LibC: Split ELFDynamicObject into a Loader + Object

Separate some responsibilities:

ELFDynamicLoader is responsible for loading elf binaries from disk and
performing relocations, calling init functions, and eventually calling
finalizer functions.

ELFDynamicObject is a helper class to parse the .dynamic section of an
elf binary, or the table of Elf32_Dyn entries at the _DYNAMIC symbol.
ELFDynamicObject now owns the helper classes for Relocations, Symbols,
Sections and the like that ELFDynamicLoader will use to perform
relocations and symbol lookup.

Because these new helpers are constructed from offsets into the .dynamic
section within the loaded .data section of the binary, we don't need the
ELFImage for nearly as much of the loading processes as we did before.
Therefore we can remove most of the extra DynamicXXX classes and just
keep the one that lets us find the location of _DYNAMIC in the new ELF.

And finally, since we changed the name of the class that dlopen/dlsym
care about, we need to compile/link and use the new ELFDynamicLoader
class in LibC.
This commit is contained in:
Andrew Kaster 2020-01-03 23:31:51 -05:00 committed by Andreas Kling
parent 85b95f472d
commit 767f4c7421
8 changed files with 788 additions and 634 deletions

View file

@ -58,6 +58,7 @@ LIBC_OBJS = \
ELF_OBJS = \ ELF_OBJS = \
../LibELF/ELFDynamicObject.o \ ../LibELF/ELFDynamicObject.o \
../LibELF/ELFDynamicLoader.o \
../LibELF/ELFImage.o ../LibELF/ELFImage.o
OBJS = $(AK_OBJS) $(LIBC_OBJS) $(ELF_OBJS) OBJS = $(AK_OBJS) $(LIBC_OBJS) $(ELF_OBJS)

View file

@ -12,12 +12,12 @@
#include <AK/ScopeGuard.h> #include <AK/ScopeGuard.h>
#include <AK/String.h> #include <AK/String.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <LibELF/ELFDynamicObject.h> #include <LibELF/ELFDynamicLoader.h>
// NOTE: The string here should never include a trailing newline (according to POSIX) // NOTE: The string here should never include a trailing newline (according to POSIX)
String g_dlerror_msg; String g_dlerror_msg;
HashMap<String, RefPtr<ELFDynamicObject>> g_elf_objects; HashMap<String, RefPtr<ELFDynamicLoader>> g_elf_objects;
extern "C" { extern "C" {
@ -68,19 +68,19 @@ void* dlopen(const char* filename, int flags)
return nullptr; return nullptr;
} }
auto image = ELFDynamicObject::construct(filename, fd, file_stats.st_size); auto loader = ELFDynamicLoader::construct(filename, fd, file_stats.st_size);
if (!image->is_valid()) { if (!loader->is_valid()) {
g_dlerror_msg = String::format("%s is not a valid ELF dynamic shared object!", filename); g_dlerror_msg = String::format("%s is not a valid ELF dynamic shared object!", filename);
return nullptr; return nullptr;
} }
if (!image->load(flags)) { if (!loader->load_from_image(flags)) {
g_dlerror_msg = String::format("Failed to load ELF object %s", filename); g_dlerror_msg = String::format("Failed to load ELF object %s", filename);
return nullptr; return nullptr;
} }
g_elf_objects.set(file_path.basename(), move(image)); g_elf_objects.set(file_path.basename(), move(loader));
g_dlerror_msg = "Successfully loaded ELF object."; g_dlerror_msg = "Successfully loaded ELF object.";
// we have one refcount already // we have one refcount already
@ -91,7 +91,7 @@ void* dlsym(void* handle, const char* symbol_name)
{ {
// FIXME: When called with a NULL handle we're supposed to search every dso in the process... that'll get expensive // FIXME: When called with a NULL handle we're supposed to search every dso in the process... that'll get expensive
ASSERT(handle); ASSERT(handle);
auto* dso = reinterpret_cast<ELFDynamicObject*>(handle); auto* dso = reinterpret_cast<ELFDynamicLoader*>(handle);
void* symbol = dso->symbol_for_name(symbol_name); void* symbol = dso->symbol_for_name(symbol_name);
if (!symbol) { if (!symbol) {
g_dlerror_msg = "Symbol not found"; g_dlerror_msg = "Symbol not found";

View file

@ -0,0 +1,343 @@
#include <AK/StringBuilder.h>
#include <LibELF/ELFDynamicLoader.h>
#include <assert.h>
#include <dlfcn.h>
#include <mman.h>
#include <stdio.h>
#include <stdlib.h>
#define DYNAMIC_LOAD_DEBUG
//#define DYNAMIC_LOAD_VERBOSE
#ifdef DYNAMIC_LOAD_VERBOSE
# define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__)
#else
# define VERBOSE(fmt, ...) \
do { \
} while (0)
#endif
static bool s_always_bind_now = false;
NonnullRefPtr<ELFDynamicLoader> ELFDynamicLoader::construct(const char* filename, int fd, size_t size)
{
return adopt(*new ELFDynamicLoader(filename, fd, size));
}
ELFDynamicLoader::ELFDynamicLoader(const char* filename, int fd, size_t size)
: m_filename(filename)
, m_file_size(size)
, m_image_fd(fd)
{
String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters());
m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters());
if (MAP_FAILED == m_file_mapping) {
m_valid = false;
}
}
ELFDynamicLoader::~ELFDynamicLoader()
{
if (MAP_FAILED != m_file_mapping)
munmap(m_file_mapping, m_file_size);
}
void* ELFDynamicLoader::symbol_for_name(const char* name)
{
auto symbol = m_dynamic_object->hash_section().lookup_symbol(name);
if (symbol.is_undefined())
return nullptr;
return m_dynamic_object->base_address().offset(symbol.value()).as_ptr();
}
bool ELFDynamicLoader::load_from_image(unsigned flags)
{
ELFImage elf_image((u8*)m_file_mapping);
m_valid = elf_image.is_valid() && elf_image.is_dynamic();
if (!m_valid) {
return false;
}
const ELFImage::DynamicSection probably_dynamic_section = elf_image.dynamic_section();
if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) {
m_valid = false;
return false;
}
#ifdef DYNAMIC_LOAD_VERBOSE
m_image->dump();
#endif
load_program_headers(elf_image);
const ELFImage::DynamicSection image_dynamic_section = elf_image.dynamic_section();
m_dynamic_object = AK::make<ELFDynamicObject>(m_text_segment_load_address, image_dynamic_section.offset());
return load_stage_2(flags);
}
bool ELFDynamicLoader::load_stage_2(unsigned flags)
{
ASSERT(flags & RTLD_GLOBAL);
ASSERT(flags & RTLD_LAZY);
#ifdef DYNAMIC_LOAD_DEBUG
m_dynamic_object->dump();
#endif
if (m_dynamic_object->has_text_relocations()) {
ASSERT(m_text_segment_load_address.get() != 0);
if (0 > mprotect(m_text_segment_load_address.as_ptr(), m_text_segment_size, PROT_READ | PROT_WRITE)) {
perror("mprotect"); // FIXME: dlerror?
return false;
}
}
do_relocations();
setup_plt_trampoline();
// Clean up our setting of .text to PROT_READ | PROT_WRITE
if (m_dynamic_object->has_text_relocations()) {
if (0 > mprotect(m_text_segment_load_address.as_ptr(), m_text_segment_size, PROT_READ | PROT_EXEC)) {
perror("mprotect"); // FIXME: dlerror?
return false;
}
}
call_object_init_functions();
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Loaded %s\n", m_filename.characters());
#endif
return true;
}
void ELFDynamicLoader::load_program_headers(const ELFImage& elf_image)
{
size_t total_required_allocation_size = 0; // NOTE: If we don't have any TEXTREL, we can keep RO data RO, which would be nice
Vector<ProgramHeaderRegion> program_headers;
ProgramHeaderRegion* text_region_ptr = nullptr;
ProgramHeaderRegion* data_region_ptr = nullptr;
ProgramHeaderRegion* tls_region_ptr = nullptr;
elf_image.for_each_program_header([&](const ELFImage::ProgramHeader& program_header) {
ProgramHeaderRegion new_region;
new_region.set_program_header(program_header.raw_header());
if (new_region.is_load())
total_required_allocation_size += new_region.required_load_size();
program_headers.append(move(new_region));
auto& region = program_headers.last();
if (region.is_tls_template())
tls_region_ptr = &region;
else if (region.is_load()) {
if (region.is_executable())
text_region_ptr = &region;
else
data_region_ptr = &region;
}
});
ASSERT(text_region_ptr && data_region_ptr);
// Process regions in order: .text, .data, .tls
auto* region = text_region_ptr;
void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters());
if (MAP_FAILED == text_segment_begin) {
ASSERT_NOT_REACHED();
}
m_text_segment_size = region->required_load_size();
m_text_segment_load_address = VirtualAddress { (u32)text_segment_begin };
region = data_region_ptr;
void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + m_text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters());
if (MAP_FAILED == data_segment_begin) {
ASSERT_NOT_REACHED();
}
VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
// FIXME: Do some kind of 'allocate TLS section' or some such from a per-application pool
if (tls_region_ptr) {
region = tls_region_ptr;
// FIXME: This can't be right either. TLS needs some real work i'd say :)
m_tls_segment_address = tls_region_ptr->desired_load_address();
VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
}
}
void ELFDynamicLoader::do_relocations()
{
u32 load_base_address = m_dynamic_object->base_address().get();
// FIXME: We should really bail on undefined symbols here.
auto main_relocation_section = m_dynamic_object->relocation_section();
main_relocation_section.for_each_relocation([&](const ELFDynamicObject::Relocation& relocation) {
VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", relocation.offset_in_section() / main_relocation_section.entry_size(), relocation.offset(), relocation.type(), relocation.symbol_index());
u32* patch_ptr = (u32*)(load_base_address + relocation.offset());
switch (relocation.type()) {
case R_386_NONE:
// Apparently most loaders will just skip these?
// Seems if the 'link editor' generates one something is funky with your code
VERBOSE("None relocation. No symbol, no nothin.\n");
break;
case R_386_32: {
auto symbol = relocation.symbol();
VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value());
u32 symbol_address = symbol.value() + load_base_address;
*patch_ptr += symbol_address;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_PC32: {
auto symbol = relocation.symbol();
VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value());
u32 relative_offset = (symbol.value() - relocation.offset());
*patch_ptr += relative_offset;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_GLOB_DAT: {
auto symbol = relocation.symbol();
VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value());
u32 symbol_location = load_base_address + symbol.value();
*patch_ptr = symbol_location;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_RELATIVE: {
// FIXME: According to the spec, R_386_relative ones must be done first.
// We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT
// However, our compiler is nice enough to put them at the front of the relocations for us :)
VERBOSE("Load address relocation at offset %X\n", relocation.offset());
VERBOSE(" patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + load_base_address);
*patch_ptr += load_base_address; // + addend for RelA (addend for Rel is stored at addr)
break;
}
case R_386_TLS_TPOFF: {
VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset());
// FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means...
// FIXME: Check m_has_static_tls and do something different for dynamic TLS
*patch_ptr = relocation.offset() - (u32)m_tls_segment_address.as_ptr() - *patch_ptr;
break;
}
default:
// Raise the alarm! Someone needs to implement this relocation type
dbgprintf("Found a new exciting relocation type %d\n", relocation.type());
printf("ELFDynamicLoader: Found unknown relocation type %d\n", relocation.type());
ASSERT_NOT_REACHED();
break;
}
return IterationDecision::Continue;
});
// Handle PLT Global offset table relocations.
m_dynamic_object->plt_relocation_section().for_each_relocation([&](const ELFDynamicObject::Relocation& relocation) {
// FIXME: Or BIND_NOW flag passed in?
if (m_dynamic_object->must_bind_now() || s_always_bind_now) {
// Eagerly BIND_NOW the PLT entries, doing all the symbol looking goodness
// The patch method returns the address for the LAZY fixup path, but we don't need it here
(void)patch_plt_entry(relocation.offset_in_section());
} else {
// LAZY-ily bind the PLT slots by just adding the base address to the offsets stored there
// This avoids doing symbol lookup, which might be expensive
ASSERT(relocation.type() == R_386_JMP_SLOT);
u8* relocation_address = relocation.address().as_ptr();
*(u32*)relocation_address += load_base_address;
}
return IterationDecision::Continue;
});
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Done relocating!\n");
#endif
}
// Defined in <arch>/plt_trampoline.S
extern "C" void _plt_trampoline(void) __attribute__((visibility("hidden")));
void ELFDynamicLoader::setup_plt_trampoline()
{
VirtualAddress got_address = m_dynamic_object->plt_got_base_address();
u32* got_u32_ptr = (u32*)got_address.as_ptr();
got_u32_ptr[1] = (u32)this;
got_u32_ptr[2] = (u32)&_plt_trampoline;
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Set GOT PLT entries at %p: [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]);
#endif
}
// Called from our ASM routine _plt_trampoline
extern "C" Elf32_Addr _fixup_plt_entry(ELFDynamicLoader* object, u32 relocation_offset)
{
return object->patch_plt_entry(relocation_offset);
}
// offset is in PLT relocation table
Elf32_Addr ELFDynamicLoader::patch_plt_entry(u32 relocation_offset)
{
auto relocation = m_dynamic_object->plt_relocation_section().relocation_at_offset(relocation_offset);
ASSERT(relocation.type() == R_386_JMP_SLOT);
auto sym = relocation.symbol();
u8* relocation_address = relocation.address().as_ptr();
u32 symbol_location = sym.address().get();
VERBOSE("ELFDynamicLoader: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address);
*(u32*)relocation_address = symbol_location;
return symbol_location;
}
void ELFDynamicLoader::call_object_init_functions()
{
typedef void (*InitFunc)();
auto init_function = (InitFunc)(m_dynamic_object->init_section().address().as_ptr());
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Calling DT_INIT at %p\n", init_function);
#endif
(init_function)();
auto init_array_section = m_dynamic_object->init_array_section();
InitFunc* init_begin = (InitFunc*)(init_array_section.address().as_ptr());
InitFunc* init_end = init_begin + init_array_section.entry_count();
while (init_begin != init_end) {
// Android sources claim that these can be -1, to be ignored.
// 0 definitely shows up. Apparently 0/-1 are valid? Confusing.
if (!*init_begin || ((i32)*init_begin == -1))
continue;
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin);
#endif
(*init_begin)();
++init_begin;
}
}
u32 ELFDynamicLoader::ProgramHeaderRegion::mmap_prot() const
{
int prot = 0;
prot |= is_executable() ? PROT_EXEC : 0;
prot |= is_readable() ? PROT_READ : 0;
prot |= is_writable() ? PROT_WRITE : 0;
return prot;
}

View file

@ -0,0 +1,89 @@
#pragma once
#include <LibELF/ELFDynamicObject.h>
#include <LibELF/ELFImage.h>
#include <LibELF/exec_elf.h>
#include <mman.h>
#include <AK/OwnPtr.h>
#include <AK/RefCounted.h>
#include <AK/String.h>
#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1)))
class ELFDynamicLoader : public RefCounted<ELFDynamicLoader> {
public:
static NonnullRefPtr<ELFDynamicLoader> construct(const char* filename, int fd, size_t file_size);
~ELFDynamicLoader();
bool is_valid() const { return m_valid; }
// Load a full ELF image from file into the current process and create an ELFDynamicObject
// from the SHT_DYNAMIC in the file.
bool load_from_image(unsigned flags);
// Stage 2 of loading: relocations and init functions
// Assumes that the program headers have been loaded and that m_dynamic_object is initialized
// Splitting loading like this allows us to use the same code to relocate a main executable as an elf binary
bool load_stage_2(unsigned flags);
// Intended for use by dlsym or other internal methods
void* symbol_for_name(const char*);
void dump();
// Will be called from _fixup_plt_entry, as part of the PLT trampoline
Elf32_Addr patch_plt_entry(u32 relocation_offset);
private:
class ProgramHeaderRegion {
public:
void set_program_header(const Elf32_Phdr& header) { m_program_header = header; }
// Information from ELF Program header
u32 type() const { return m_program_header.p_type; }
u32 flags() const { return m_program_header.p_flags; }
u32 offset() const { return m_program_header.p_offset; }
VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); }
u32 size_in_memory() const { return m_program_header.p_memsz; }
u32 size_in_image() const { return m_program_header.p_filesz; }
u32 alignment() const { return m_program_header.p_align; }
u32 mmap_prot() const;
bool is_readable() const { return flags() & PF_R; }
bool is_writable() const { return flags() & PF_W; }
bool is_executable() const { return flags() & PF_X; }
bool is_tls_template() const { return type() == PT_TLS; }
bool is_load() const { return type() == PT_LOAD; }
bool is_dynamic() const { return type() == PT_DYNAMIC; }
u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); }
private:
Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image
};
explicit ELFDynamicLoader(const char* filename, int fd, size_t file_size);
explicit ELFDynamicLoader(Elf32_Dyn* dynamic_location, Elf32_Addr load_address);
// Stage 1
void load_program_headers(const ELFImage& elf_image);
// Stage 2
void do_relocations();
void setup_plt_trampoline();
void call_object_init_functions();
String m_filename;
size_t m_file_size { 0 };
int m_image_fd { -1 };
void* m_file_mapping { nullptr };
bool m_valid { true };
OwnPtr<ELFDynamicObject> m_dynamic_object;
VirtualAddress m_text_segment_load_address;
size_t m_text_segment_size;
VirtualAddress m_tls_segment_address;
};

View file

@ -1,146 +1,44 @@
#include <AK/StringBuilder.h>
#include <LibELF/ELFDynamicObject.h> #include <LibELF/ELFDynamicObject.h>
#include <LibELF/exec_elf.h>
#include <AK/StringBuilder.h>
#include <assert.h> #include <assert.h>
#include <mman.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#define DYNAMIC_LOAD_DEBUG static const char* name_for_dtag(Elf32_Sword d_tag);
//#define DYNAMIC_LOAD_VERBOSE
#ifdef DYNAMIC_LOAD_VERBOSE ELFDynamicObject::ELFDynamicObject(VirtualAddress base_address, u32 dynamic_offset)
# define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__) : m_base_address(base_address)
#else , m_dynamic_offset(dynamic_offset)
# define VERBOSE(fmt, ...) \
do { \
} while (0)
#endif
static bool s_always_bind_now = false;
static const char* name_for_dtag(Elf32_Sword tag);
// SYSV ELF hash algorithm
// Note that the GNU HASH algorithm has less collisions
static uint32_t calculate_elf_hash(const char* name)
{ {
uint32_t hash = 0; parse();
uint32_t top_nibble_of_hash = 0;
while (*name != '\0') {
hash = hash << 4;
hash += *name;
name++;
top_nibble_of_hash = hash & 0xF0000000U;
if (top_nibble_of_hash != 0)
hash ^= top_nibble_of_hash >> 24;
hash &= ~top_nibble_of_hash;
}
return hash;
}
NonnullRefPtr<ELFDynamicObject> ELFDynamicObject::construct(const char* filename, int fd, size_t size)
{
return adopt(*new ELFDynamicObject(filename, fd, size));
}
ELFDynamicObject::ELFDynamicObject(const char* filename, int fd, size_t size)
: m_filename(filename)
, m_file_size(size)
, m_image_fd(fd)
{
String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters());
m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters());
if (MAP_FAILED == m_file_mapping) {
m_valid = false;
return;
}
m_image = AK::make<ELFImage>((u8*)m_file_mapping);
m_valid = m_image->is_valid() && m_image->parse() && m_image->is_dynamic();
if (!m_valid) {
return;
}
const ELFImage::DynamicSection probably_dynamic_section = m_image->dynamic_section();
if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) {
m_valid = false;
return;
}
} }
ELFDynamicObject::~ELFDynamicObject() ELFDynamicObject::~ELFDynamicObject()
{ {
if (MAP_FAILED != m_file_mapping)
munmap(m_file_mapping, m_file_size);
} }
void* ELFDynamicObject::symbol_for_name(const char* name) void ELFDynamicObject::dump() const
{ {
// FIXME: If we enable gnu hash in the compiler, we should use that here instead
// The algo is way better with less collisions
uint32_t hash_value = calculate_elf_hash(name);
u8* load_addr = m_text_region->load_address().as_ptr();
// NOTE: We need to use the loaded hash/string/symbol tables here to get the right
// addresses. The ones that are in the ELFImage won't cut it, they aren't relocated
u32* hash_table_begin = (u32*)(load_addr + m_hash_table_offset);
Elf32_Sym* symtab = (Elf32_Sym*)(load_addr + m_symbol_table_offset);
const char* strtab = (const char*)load_addr + m_string_table_offset;
size_t num_buckets = hash_table_begin[0];
// This is here for completeness, but, since we're using the fact that every chain
// will end at chain 0 (which means 'not found'), we don't need to check num_chains.
// Interestingly, num_chains is required to be num_symbols
//size_t num_chains = hash_table_begin[1];
u32* buckets = &hash_table_begin[2];
u32* chains = &buckets[num_buckets];
for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) {
if (strcmp(name, strtab + symtab[i].st_name) == 0) {
void* symbol_address = load_addr + symtab[i].st_value;
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, strtab + symtab[i].st_name, symbol_address);
#endif
return symbol_address;
}
}
return nullptr;
}
void ELFDynamicObject::dump()
{
auto dynamic_section = m_image->dynamic_section();
StringBuilder builder; StringBuilder builder;
builder.append("\nd_tag tag_name value\n"); builder.append("\nd_tag tag_name value\n");
size_t num_dynamic_sections = 0; size_t num_dynamic_sections = 0;
dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) { for_each_dynamic_entry([&](const ELFDynamicObject::DynamicEntry& entry) {
String name_field = String::format("(%s)", name_for_dtag(entry.tag())); String name_field = String::format("(%s)", name_for_dtag(entry.tag()));
builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val()); builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val());
num_dynamic_sections++; num_dynamic_sections++;
return IterationDecision::Continue; return IterationDecision::Continue;
}); });
dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", dynamic_section.offset(), num_dynamic_sections); dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", m_dynamic_offset, num_dynamic_sections);
dbgprintf(builder.to_string().characters()); dbgprintf(builder.to_string().characters());
} }
void ELFDynamicObject::parse_dynamic_section() void ELFDynamicObject::parse()
{ {
auto dynamic_section = m_image->dynamic_section(); for_each_dynamic_entry([&](const DynamicEntry& entry) {
dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) {
switch (entry.tag()) { switch (entry.tag()) {
case DT_INIT: case DT_INIT:
m_init_offset = entry.ptr(); m_init_offset = entry.ptr();
@ -154,6 +52,12 @@ void ELFDynamicObject::parse_dynamic_section()
case DT_INIT_ARRAYSZ: case DT_INIT_ARRAYSZ:
m_init_array_size = entry.val(); m_init_array_size = entry.val();
break; break;
case DT_FINI_ARRAY:
m_fini_array_offset = entry.ptr();
break;
case DT_FINI_ARRAYSZ:
m_fini_array_size = entry.val();
break;
case DT_HASH: case DT_HASH:
m_hash_table_offset = entry.ptr(); m_hash_table_offset = entry.ptr();
break; break;
@ -199,14 +103,10 @@ void ELFDynamicObject::parse_dynamic_section()
m_number_of_relocations = entry.val(); m_number_of_relocations = entry.val();
break; break;
case DT_FLAGS: case DT_FLAGS:
m_must_bind_now = entry.val() & DF_BIND_NOW; m_dt_flags = entry.val();
m_has_text_relocations = entry.val() & DF_TEXTREL;
m_should_process_origin = entry.val() & DF_ORIGIN;
m_has_static_thread_local_storage = entry.val() & DF_STATIC_TLS;
m_requires_symbolic_symbol_resolution = entry.val() & DF_SYMBOLIC;
break; break;
case DT_TEXTREL: case DT_TEXTREL:
m_has_text_relocations = true; // This tag seems to exist for legacy reasons only? m_dt_flags |= DF_TEXTREL; // This tag seems to exist for legacy reasons only?
break; break;
default: default:
dbgprintf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag())); dbgprintf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag()));
@ -216,280 +116,130 @@ void ELFDynamicObject::parse_dynamic_section()
} }
return IterationDecision::Continue; return IterationDecision::Continue;
}); });
auto hash_section_address = hash_section().address().as_ptr();
auto num_hash_chains = ((u32*)hash_section_address)[1];
m_symbol_count = num_hash_chains;
} }
typedef void (*InitFunc)(); const ELFDynamicObject::Relocation ELFDynamicObject::RelocationSection::relocation(unsigned index) const
bool ELFDynamicObject::load(unsigned flags)
{ {
ASSERT(flags & RTLD_GLOBAL); ASSERT(index < entry_count());
ASSERT(flags & RTLD_LAZY); unsigned offset_in_section = index * entry_size();
auto relocation_address = (Elf32_Rel*)address().offset(offset_in_section).as_ptr();
return Relocation(m_dynamic, *relocation_address, offset_in_section);
}
#ifdef DYNAMIC_LOAD_DEBUG const ELFDynamicObject::Relocation ELFDynamicObject::RelocationSection::relocation_at_offset(unsigned offset) const
dump(); {
#endif ASSERT(offset <= (m_section_size_bytes - m_entry_size));
#ifdef DYNAMIC_LOAD_VERBOSE auto relocation_address = (Elf32_Rel*)address().offset(offset).as_ptr();
m_image->dump(); return Relocation(m_dynamic, *relocation_address, offset);
#endif }
parse_dynamic_section(); const ELFDynamicObject::Symbol ELFDynamicObject::symbol(unsigned index) const
load_program_headers(); {
auto symbol_section = Section(*this, m_symbol_table_offset, (m_symbol_count * m_size_of_symbol_table_entry), m_size_of_symbol_table_entry, "DT_SYMTAB");
auto symbol_entry = (Elf32_Sym*)symbol_section.address().offset(index * symbol_section.entry_size()).as_ptr();
return Symbol(*this, index, *symbol_entry);
}
if (m_has_text_relocations) { const ELFDynamicObject::Section ELFDynamicObject::init_section() const
if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) { {
perror("mprotect"); // FIXME: dlerror? return Section(*this, m_init_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_INIT");
return false; }
}
const ELFDynamicObject::Section ELFDynamicObject::fini_section() const
{
return Section(*this, m_fini_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_FINI");
}
const ELFDynamicObject::Section ELFDynamicObject::init_array_section() const
{
return Section(*this, m_init_array_offset, m_init_array_size, sizeof(void (*)()), "DT_INIT_ARRAY");
}
const ELFDynamicObject::Section ELFDynamicObject::fini_array_section() const
{
return Section(*this, m_fini_array_offset, m_fini_array_size, sizeof(void (*)()), "DT_FINI_ARRAY");
}
const ELFDynamicObject::HashSection ELFDynamicObject::hash_section() const
{
return HashSection(Section(*this, m_hash_table_offset, 0, 0, "DT_HASH"), HashType::SYSV);
}
const ELFDynamicObject::RelocationSection ELFDynamicObject::relocation_section() const
{
return RelocationSection(Section(*this, m_relocation_table_offset, m_size_of_relocation_table, m_size_of_relocation_entry, "DT_REL"));
}
const ELFDynamicObject::RelocationSection ELFDynamicObject::plt_relocation_section() const
{
return RelocationSection(Section(*this, m_plt_relocation_offset_location, m_size_of_plt_relocation_entry_list, m_size_of_relocation_entry, "DT_JMPREL"));
}
u32 ELFDynamicObject::HashSection::calculate_elf_hash(const char* name) const
{
// SYSV ELF hash algorithm
// Note that the GNU HASH algorithm has less collisions
uint32_t hash = 0;
uint32_t top_nibble_of_hash = 0;
while (*name != '\0') {
hash = hash << 4;
hash += *name;
name++;
top_nibble_of_hash = hash & 0xF0000000U;
if (top_nibble_of_hash != 0)
hash ^= top_nibble_of_hash >> 24;
hash &= ~top_nibble_of_hash;
} }
do_relocations(); return hash;
setup_plt_trampoline(); }
// Clean up our setting of .text to PROT_READ | PROT_WRITE u32 ELFDynamicObject::HashSection::calculate_gnu_hash(const char*) const
if (m_has_text_relocations) { {
if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) { // FIXME: Implement the GNU hash algorithm
perror("mprotect"); // FIXME: dlerror? ASSERT_NOT_REACHED();
return false; }
const ELFDynamicObject::Symbol ELFDynamicObject::HashSection::lookup_symbol(const char* name) const
{
// FIXME: If we enable gnu hash in the compiler, we should use that here instead
// The algo is way better with less collisions
u32 hash_value = (this->*(m_hash_function))(name);
u32* hash_table_begin = (u32*)address().as_ptr();
size_t num_buckets = hash_table_begin[0];
// This is here for completeness, but, since we're using the fact that every chain
// will end at chain 0 (which means 'not found'), we don't need to check num_chains.
// Interestingly, num_chains is required to be num_symbols
//size_t num_chains = hash_table_begin[1];
u32* buckets = &hash_table_begin[2];
u32* chains = &buckets[num_buckets];
for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) {
auto symbol = m_dynamic.symbol(i);
if (strcmp(name, symbol.name()) == 0) {
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, symbol.name(), symbol.address());
#endif
return symbol;
} }
} }
return m_dynamic.the_undefined_symbol();
call_object_init_functions();
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Loaded %s\n", m_filename.characters());
#endif
// FIXME: return false sometimes? missing symbol etc
return true;
} }
void ELFDynamicObject::load_program_headers() const char* ELFDynamicObject::symbol_string_table_string(Elf32_Word index) const
{ {
size_t total_required_allocation_size = 0; // NOTE: If we don't have any TEXTREL, we can keep RO data RO, which would be nice return (const char*)base_address().offset(m_string_table_offset + index).as_ptr();
m_image->for_each_program_header([&](const ELFImage::ProgramHeader& program_header) {
ProgramHeaderRegion new_region(program_header.raw_header());
if (new_region.is_load())
total_required_allocation_size += new_region.required_load_size();
m_program_header_regions.append(move(new_region));
auto& region = m_program_header_regions.last();
if (region.is_tls_template())
m_tls_region = &region;
else if (region.is_load()) {
if (region.is_executable())
m_text_region = &region;
else
m_data_region = &region;
}
});
ASSERT(m_text_region && m_data_region);
// Process regions in order: .text, .data, .tls
auto* region = m_text_region;
void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters());
size_t text_segment_size = region->required_load_size();
region->set_base_address(VirtualAddress { (u32)text_segment_begin });
region->set_load_address(VirtualAddress { (u32)text_segment_begin });
region = m_data_region;
void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters());
size_t data_segment_size = region->required_load_size();
VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
region->set_base_address(VirtualAddress { (u32)text_segment_begin });
region->set_load_address(data_segment_actual_addr);
memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
if (m_tls_region) {
region = m_data_region;
VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
region->set_base_address(VirtualAddress { (u32)text_segment_begin });
region->set_load_address(tls_segment_actual_addr);
memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
}
// sanity check
u8* end_of_in_memory_image = (u8*)data_segment_begin + data_segment_size;
ASSERT((ptrdiff_t)total_required_allocation_size == (ptrdiff_t)(end_of_in_memory_image - (u8*)text_segment_begin));
}
void ELFDynamicObject::do_relocations()
{
auto dyn_relocation_section = m_image->dynamic_relocation_section();
if (StringView(".rel.dyn") != dyn_relocation_section.name() || SHT_REL != dyn_relocation_section.type()) {
ASSERT_NOT_REACHED();
}
u8* load_base_address = m_text_region->base_address().as_ptr();
int i = -1;
// FIXME: We should really bail on undefined symbols here. (but, there's some TLS vars that are currently undef soooo.... :) )
dyn_relocation_section.for_each_relocation([&](const ELFImage::DynamicRelocation& relocation) {
++i;
VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", i, relocation.offset(), relocation.type(), relocation.symbol_index());
u32* patch_ptr = (u32*)(load_base_address + relocation.offset());
switch (relocation.type()) {
case R_386_NONE:
// Apparently most loaders will just skip these?
// Seems if the 'link editor' generates one something is funky with your code
VERBOSE("None relocation. No symbol, no nothin.\n");
break;
case R_386_32: {
auto symbol = relocation.symbol();
VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value());
u32 symbol_address = symbol.value() + (u32)load_base_address;
*patch_ptr += symbol_address;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_PC32: {
auto symbol = relocation.symbol();
VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value());
u32 relative_offset = (symbol.value() - relocation.offset());
*patch_ptr += relative_offset;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_GLOB_DAT: {
auto symbol = relocation.symbol();
VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value());
u32 symbol_location = (u32)(load_base_address + symbol.value());
*patch_ptr = symbol_location;
VERBOSE(" Symbol address: %p\n", *patch_ptr);
break;
}
case R_386_RELATIVE: {
// FIXME: According to the spec, R_386_relative ones must be done first.
// We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT
// However, our compiler is nice enough to put them at the front of the relocations for us :)
VERBOSE("Load address relocation at offset %X\n", relocation.offset());
VERBOSE(" patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + (u32)load_base_address);
*patch_ptr += (u32)load_base_address; // + addend for RelA (addend for Rel is stored at addr)
break;
}
case R_386_TLS_TPOFF: {
VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset());
// FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means...
// FIXME: Check m_has_static_tls and do something different for dynamic TLS
VirtualAddress tls_region_loctation = m_tls_region->desired_load_address();
*patch_ptr = relocation.offset() - (u32)tls_region_loctation.as_ptr() - *patch_ptr;
break;
}
default:
// Raise the alarm! Someone needs to implement this relocation type
dbgprintf("Found a new exciting relocation type %d\n", relocation.type());
printf("ELFDynamicObject: Found unknown relocation type %d\n", relocation.type());
ASSERT_NOT_REACHED();
break;
}
return IterationDecision::Continue;
});
// Handle PLT Global offset table relocations.
for (size_t idx = 0; idx < m_size_of_plt_relocation_entry_list; idx += m_size_of_relocation_entry) {
// FIXME: Or BIND_NOW flag passed in?
if (m_must_bind_now || s_always_bind_now) {
// Eagerly BIND_NOW the PLT entries, doing all the symbol looking goodness
// The patch method returns the address for the LAZY fixup path, but we don't need it here
(void)patch_plt_entry(idx);
} else {
// LAZY-ily bind the PLT slots by just adding the base address to the offsets stored there
// This avoids doing symbol lookup, which might be expensive
VirtualAddress relocation_vaddr = m_text_region->load_address().offset(m_plt_relocation_offset_location).offset(idx);
Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_vaddr.as_ptr();
ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT);
auto* image_base_address = m_text_region->base_address().as_ptr();
u8* relocation_address = image_base_address + jump_slot_relocation->r_offset;
*(u32*)relocation_address += (u32)image_base_address;
}
}
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Done relocating!\n");
#endif
}
// Defined in <arch>/plt_trampoline.S
extern "C" void _plt_trampoline(void) __attribute__((visibility("hidden")));
void ELFDynamicObject::setup_plt_trampoline()
{
const ELFImage::Section& got_section = m_image->lookup_section(".got.plt");
VirtualAddress got_address = m_text_region->load_address().offset(got_section.address());
u32* got_u32_ptr = reinterpret_cast<u32*>(got_address.as_ptr());
got_u32_ptr[1] = (u32)this;
got_u32_ptr[2] = (u32)&_plt_trampoline;
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Set GOT PLT entries at %p offset(%p): [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_section.offset(), got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]);
#endif
}
// Called from our ASM routine _plt_trampoline
extern "C" Elf32_Addr _fixup_plt_entry(ELFDynamicObject* object, u32 relocation_idx)
{
return object->patch_plt_entry(relocation_idx);
}
// offset is in PLT relocation table
Elf32_Addr ELFDynamicObject::patch_plt_entry(u32 relocation_idx)
{
VirtualAddress plt_relocation_table_address = m_text_region->load_address().offset(m_plt_relocation_offset_location);
VirtualAddress relocation_entry_address = plt_relocation_table_address.offset(relocation_idx);
Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_entry_address.as_ptr();
ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT);
auto sym = m_image->dynamic_symbol(ELF32_R_SYM(jump_slot_relocation->r_info));
auto* image_base_address = m_text_region->base_address().as_ptr();
u8* relocation_address = image_base_address + jump_slot_relocation->r_offset;
u32 symbol_location = (u32)(image_base_address + sym.value());
VERBOSE("ELFDynamicObject: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address);
*(u32*)relocation_address = symbol_location;
return symbol_location;
}
void ELFDynamicObject::call_object_init_functions()
{
u8* load_addr = m_text_region->load_address().as_ptr();
InitFunc init_function = (InitFunc)(load_addr + m_init_offset);
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Calling DT_INIT at %p\n", init_function);
#endif
(init_function)();
InitFunc* init_begin = (InitFunc*)(load_addr + m_init_array_offset);
u32 init_end = (u32)((u8*)init_begin + m_init_array_size);
while ((u32)init_begin < init_end) {
// Android sources claim that these can be -1, to be ignored.
// 0 definitely shows up. Apparently 0/-1 are valid? Confusing.
if (!*init_begin || ((i32)*init_begin == -1))
continue;
#ifdef DYNAMIC_LOAD_DEBUG
dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin);
#endif
(*init_begin)();
++init_begin;
}
}
u32 ELFDynamicObject::ProgramHeaderRegion::mmap_prot() const
{
int prot = 0;
prot |= is_executable() ? PROT_EXEC : 0;
prot |= is_readable() ? PROT_READ : 0;
prot |= is_writable() ? PROT_WRITE : 0;
return prot;
} }
static const char* name_for_dtag(Elf32_Sword d_tag) static const char* name_for_dtag(Elf32_Sword d_tag)

View file

@ -1,94 +1,205 @@
#pragma once #pragma once
#include <LibELF/ELFImage.h>
#include <LibELF/exec_elf.h> #include <LibELF/exec_elf.h>
#include <dlfcn.h>
#include <mman.h>
#include <AK/OwnPtr.h> #include <Kernel/VM/VirtualAddress.h>
#include <AK/RefCounted.h>
#include <AK/String.h>
#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1))) class ELFDynamicObject {
class ELFDynamicObject : public RefCounted<ELFDynamicObject> {
public: public:
static NonnullRefPtr<ELFDynamicObject> construct(const char* filename, int fd, size_t file_size); explicit ELFDynamicObject(VirtualAddress base_address, u32 dynamic_offset);
~ELFDynamicObject(); ~ELFDynamicObject();
void dump() const;
bool is_valid() const { return m_valid; } class DynamicEntry;
class Section;
class RelocationSection;
class Symbol;
class Relocation;
class HashSection;
// FIXME: How can we resolve all of the symbols without having the original elf image for our process? class DynamicEntry {
// RTLD_LAZY only at first probably... though variables ('objects') need resolved at load time every time
bool load(unsigned flags);
// Intended for use by dlsym or other internal methods
void* symbol_for_name(const char*);
void dump();
// Will be called from _fixup_plt_entry, as part of the PLT trampoline
Elf32_Addr patch_plt_entry(u32 relocation_offset);
private:
class ProgramHeaderRegion {
public: public:
ProgramHeaderRegion(const Elf32_Phdr& header) DynamicEntry(const Elf32_Dyn& dyn)
: m_program_header(header) : m_dyn(dyn)
{ {
} }
VirtualAddress load_address() const { return m_load_address; } ~DynamicEntry() {}
VirtualAddress base_address() const { return m_image_base_address; }
void set_load_address(VirtualAddress addr) { m_load_address = addr; } Elf32_Sword tag() const { return m_dyn.d_tag; }
void set_base_address(VirtualAddress addr) { m_image_base_address = addr; } Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; }
Elf32_Word val() const { return m_dyn.d_un.d_val; }
// Information from ELF Program header
u32 type() const { return m_program_header.p_type; }
u32 flags() const { return m_program_header.p_flags; }
u32 offset() const { return m_program_header.p_offset; }
VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); }
u32 size_in_memory() const { return m_program_header.p_memsz; }
u32 size_in_image() const { return m_program_header.p_filesz; }
u32 alignment() const { return m_program_header.p_align; }
u32 mmap_prot() const;
bool is_readable() const { return flags() & PF_R; }
bool is_writable() const { return flags() & PF_W; }
bool is_executable() const { return flags() & PF_X; }
bool is_tls_template() const { return type() == PT_TLS; }
bool is_load() const { return type() == PT_LOAD; }
bool is_dynamic() const { return type() == PT_DYNAMIC; }
u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); }
private: private:
Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image const Elf32_Dyn& m_dyn;
VirtualAddress m_load_address { 0 };
VirtualAddress m_image_base_address { 0 };
}; };
explicit ELFDynamicObject(const char* filename, int fd, size_t file_size); class Symbol {
public:
Symbol(const ELFDynamicObject& dynamic, unsigned index, const Elf32_Sym& sym)
: m_dynamic(dynamic)
, m_sym(sym)
, m_index(index)
{
}
void parse_dynamic_section(); ~Symbol() {}
void load_program_headers();
void do_relocations();
void setup_plt_trampoline();
void call_object_init_functions();
String m_filename; const char* name() const { return m_dynamic.symbol_string_table_string(m_sym.st_name); }
size_t m_file_size { 0 }; unsigned section_index() const { return m_sym.st_shndx; }
int m_image_fd { -1 }; unsigned value() const { return m_sym.st_value; }
void* m_file_mapping { nullptr }; unsigned size() const { return m_sym.st_size; }
bool m_valid { false }; unsigned index() const { return m_index; }
unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); }
unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); }
bool is_undefined() const { return this == &m_dynamic.the_undefined_symbol(); }
VirtualAddress address() const { return m_dynamic.base_address().offset(value()); }
OwnPtr<ELFImage> m_image; private:
const ELFDynamicObject& m_dynamic;
const Elf32_Sym& m_sym;
const unsigned m_index;
};
Vector<ProgramHeaderRegion> m_program_header_regions; class Section {
ProgramHeaderRegion* m_text_region { nullptr }; public:
ProgramHeaderRegion* m_data_region { nullptr }; Section(const ELFDynamicObject& dynamic, unsigned section_offset, unsigned section_size_bytes, unsigned entry_size, const char* name)
ProgramHeaderRegion* m_tls_region { nullptr }; : m_dynamic(dynamic)
, m_section_offset(section_offset)
, m_section_size_bytes(section_size_bytes)
, m_entry_size(entry_size)
, m_name(name)
{
}
~Section() {}
const char* name() const { return m_name; }
unsigned offset() const { return m_section_offset; }
unsigned size() const { return m_section_size_bytes; }
unsigned entry_size() const { return m_entry_size; }
unsigned entry_count() const { return !entry_size() ? 0 : size() / entry_size(); }
VirtualAddress address() const { return m_dynamic.base_address().offset(m_section_offset); }
protected:
friend class RelocationSection;
friend class HashSection;
const ELFDynamicObject& m_dynamic;
unsigned m_section_offset;
unsigned m_section_size_bytes;
unsigned m_entry_size;
const char* m_name { nullptr };
};
class RelocationSection : public Section {
public:
RelocationSection(const Section& section)
: Section(section.m_dynamic, section.m_section_offset, section.m_section_size_bytes, section.m_entry_size, section.m_name)
{
}
unsigned relocation_count() const { return entry_count(); }
const Relocation relocation(unsigned index) const;
const Relocation relocation_at_offset(unsigned offset) const;
template<typename F>
void for_each_relocation(F) const;
};
class Relocation {
public:
Relocation(const ELFDynamicObject& dynamic, const Elf32_Rel& rel, unsigned offset_in_section)
: m_dynamic(dynamic)
, m_rel(rel)
, m_offset_in_section(offset_in_section)
{
}
~Relocation() {}
unsigned offset_in_section() const { return m_offset_in_section; }
unsigned offset() const { return m_rel.r_offset; }
unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); }
unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); }
const Symbol symbol() const { return m_dynamic.symbol(symbol_index()); }
VirtualAddress address() const { return m_dynamic.base_address().offset(offset()); }
private:
const ELFDynamicObject& m_dynamic;
const Elf32_Rel& m_rel;
const unsigned m_offset_in_section;
};
enum class HashType {
SYSV,
GNU
};
class HashSection : public Section {
public:
HashSection(const Section& section, HashType hash_type = HashType::SYSV)
: Section(section.m_dynamic, section.m_section_offset, section.m_section_size_bytes, section.m_entry_size, section.m_name)
{
switch (hash_type) {
case HashType::SYSV:
m_hash_function = &HashSection::calculate_elf_hash;
break;
case HashType::GNU:
m_hash_function = &HashSection::calculate_gnu_hash;
break;
default:
ASSERT_NOT_REACHED();
break;
}
}
const Symbol lookup_symbol(const char*) const;
private:
u32 calculate_elf_hash(const char* name) const;
u32 calculate_gnu_hash(const char* name) const;
typedef u32 (HashSection::*HashFunction)(const char*) const;
HashFunction m_hash_function;
};
unsigned symbol_count() const { return m_symbol_count; }
const Symbol symbol(unsigned) const;
const Symbol& the_undefined_symbol() const { return m_the_undefined_symbol; }
const Section init_section() const;
const Section fini_section() const;
const Section init_array_section() const;
const Section fini_array_section() const;
const HashSection hash_section() const;
const RelocationSection relocation_section() const;
const RelocationSection plt_relocation_section() const;
bool should_process_origin() const { return m_dt_flags & DF_ORIGIN; }
bool requires_symbolic_symbol_resolution() const { return m_dt_flags & DF_SYMBOLIC; }
// Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ
bool has_text_relocations() const { return m_dt_flags & DF_TEXTREL; }
bool must_bind_now() const { return m_dt_flags & DF_BIND_NOW; }
bool has_static_thread_local_storage() const { return m_dt_flags & DF_STATIC_TLS; }
VirtualAddress plt_got_base_address() const { return m_base_address.offset(m_procedure_linkage_table_offset); }
VirtualAddress base_address() const { return m_base_address; }
private:
const char* symbol_string_table_string(Elf32_Word) const;
void parse();
template<typename F>
void for_each_symbol(F) const;
template<typename F>
void for_each_dynamic_entry(F) const;
VirtualAddress m_base_address;
u32 m_dynamic_offset;
Symbol m_the_undefined_symbol { *this, 0, {} };
unsigned m_symbol_count { 0 };
// Begin Section information collected from DT_* entries // Begin Section information collected from DT_* entries
uintptr_t m_init_offset { 0 }; uintptr_t m_init_offset { 0 };
@ -96,12 +207,14 @@ private:
uintptr_t m_init_array_offset { 0 }; uintptr_t m_init_array_offset { 0 };
size_t m_init_array_size { 0 }; size_t m_init_array_size { 0 };
uintptr_t m_fini_array_offset { 0 };
size_t m_fini_array_size { 0 };
uintptr_t m_hash_table_offset { 0 }; uintptr_t m_hash_table_offset { 0 };
uintptr_t m_string_table_offset { 0 }; uintptr_t m_string_table_offset { 0 };
uintptr_t m_symbol_table_offset { 0 };
size_t m_size_of_string_table { 0 }; size_t m_size_of_string_table { 0 };
uintptr_t m_symbol_table_offset { 0 };
size_t m_size_of_symbol_table_entry { 0 }; size_t m_size_of_symbol_table_entry { 0 };
Elf32_Sword m_procedure_linkage_table_relocation_type { -1 }; Elf32_Sword m_procedure_linkage_table_relocation_type { -1 };
@ -110,17 +223,44 @@ private:
uintptr_t m_procedure_linkage_table_offset { 0 }; uintptr_t m_procedure_linkage_table_offset { 0 };
// NOTE: We'll only ever either RELA or REL entries, not both (thank god) // NOTE: We'll only ever either RELA or REL entries, not both (thank god)
// NOTE: The x86 ABI will only ever genrerate REL entries.
size_t m_number_of_relocations { 0 }; size_t m_number_of_relocations { 0 };
size_t m_size_of_relocation_entry { 0 }; size_t m_size_of_relocation_entry { 0 };
size_t m_size_of_relocation_table { 0 }; size_t m_size_of_relocation_table { 0 };
uintptr_t m_relocation_table_offset { 0 }; uintptr_t m_relocation_table_offset { 0 };
// DT_FLAGS // DT_FLAGS
bool m_should_process_origin = false; Elf32_Word m_dt_flags { 0 };
bool m_requires_symbolic_symbol_resolution = false;
// Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ
bool m_has_text_relocations = false;
bool m_must_bind_now = false; // FIXME: control with an environment var as well?
bool m_has_static_thread_local_storage = false;
// End Section information from DT_* entries // End Section information from DT_* entries
}; };
template<typename F>
inline void ELFDynamicObject::RelocationSection::for_each_relocation(F func) const
{
for (unsigned i = 0; i < relocation_count(); ++i) {
if (func(relocation(i)) == IterationDecision::Break)
break;
}
}
template<typename F>
inline void ELFDynamicObject::for_each_symbol(F func) const
{
for (unsigned i = 0; i < symbol_count(); ++i) {
if (func(symbol(i)) == IterationDecision::Break)
break;
}
}
template<typename F>
inline void ELFDynamicObject::for_each_dynamic_entry(F func) const
{
auto* dyns = reinterpret_cast<const Elf32_Dyn*>(m_base_address.offset(m_dynamic_offset).as_ptr());
for (unsigned i = 0;; ++i) {
auto&& dyn = DynamicEntry(dyns[i]);
if (dyn.tag() == DT_NULL)
break;
if (func(dyn) == IterationDecision::Break)
break;
}
}

View file

@ -43,11 +43,6 @@ unsigned ELFImage::symbol_count() const
return section(m_symbol_table_section_index).entry_count(); return section(m_symbol_table_section_index).entry_count();
} }
unsigned ELFImage::dynamic_symbol_count() const
{
return section(m_dynamic_symbol_table_section_index).entry_count();
}
void ELFImage::dump() const void ELFImage::dump() const
{ {
dbgprintf("ELFImage{%p} {\n", this); dbgprintf("ELFImage{%p} {\n", this);
@ -117,24 +112,11 @@ bool ELFImage::parse()
if (sh.sh_type == SHT_STRTAB && i != header().e_shstrndx) { if (sh.sh_type == SHT_STRTAB && i != header().e_shstrndx) {
if (StringView(".strtab") == section_header_table_string(sh.sh_name)) if (StringView(".strtab") == section_header_table_string(sh.sh_name))
m_string_table_section_index = i; m_string_table_section_index = i;
else if (StringView(".dynstr") == section_header_table_string(sh.sh_name))
m_dynamic_string_table_section_index = i;
else
ASSERT_NOT_REACHED();
} }
if (sh.sh_type == SHT_DYNAMIC) { if (sh.sh_type == SHT_DYNAMIC) {
ASSERT(!m_dynamic_section_index || m_dynamic_section_index == i); ASSERT(!m_dynamic_section_index || m_dynamic_section_index == i);
m_dynamic_section_index = i; m_dynamic_section_index = i;
} }
if (sh.sh_type == SHT_DYNSYM) {
ASSERT(!m_dynamic_symbol_table_section_index || m_dynamic_symbol_table_section_index == i);
m_dynamic_symbol_table_section_index = i;
}
if (sh.sh_type == SHT_REL) {
if (StringView(".rel.dyn") == section_header_table_string(sh.sh_name)) {
m_dynamic_relocation_section_index = i;
}
}
} }
// Then create a name-to-index map. // Then create a name-to-index map.
@ -162,14 +144,6 @@ const char* ELFImage::table_string(unsigned offset) const
return raw_data(sh.sh_offset + offset); return raw_data(sh.sh_offset + offset);
} }
const char* ELFImage::dynamic_table_string(unsigned offset) const
{
auto& sh = section_header(m_dynamic_string_table_section_index);
if (sh.sh_type != SHT_STRTAB)
return nullptr;
return raw_data(sh.sh_offset + offset);
}
const char* ELFImage::raw_data(unsigned offset) const const char* ELFImage::raw_data(unsigned offset) const
{ {
return reinterpret_cast<const char*>(m_buffer) + offset; return reinterpret_cast<const char*>(m_buffer) + offset;
@ -199,13 +173,6 @@ const ELFImage::Symbol ELFImage::symbol(unsigned index) const
return Symbol(*this, index, raw_syms[index]); return Symbol(*this, index, raw_syms[index]);
} }
const ELFImage::DynamicSymbol ELFImage::dynamic_symbol(unsigned index) const
{
ASSERT(index < symbol_count());
auto* raw_syms = reinterpret_cast<const Elf32_Sym*>(raw_data(section(m_dynamic_symbol_table_section_index).offset()));
return DynamicSymbol(*this, index, raw_syms[index]);
}
const ELFImage::Section ELFImage::section(unsigned index) const const ELFImage::Section ELFImage::section(unsigned index) const
{ {
ASSERT(index < section_count()); ASSERT(index < section_count());
@ -225,13 +192,6 @@ const ELFImage::Relocation ELFImage::RelocationSection::relocation(unsigned inde
return Relocation(m_image, rels[index]); return Relocation(m_image, rels[index]);
} }
const ELFImage::DynamicRelocation ELFImage::DynamicRelocationSection::relocation(unsigned index) const
{
ASSERT(index < relocation_count());
auto* rels = reinterpret_cast<const Elf32_Rel*>(m_image.raw_data(offset()));
return DynamicRelocation(m_image, rels[index]);
}
const ELFImage::RelocationSection ELFImage::Section::relocations() const const ELFImage::RelocationSection ELFImage::Section::relocations() const
{ {
// FIXME: This is ugly. // FIXME: This is ugly.
@ -263,9 +223,3 @@ const ELFImage::DynamicSection ELFImage::dynamic_section() const
ASSERT(is_dynamic()); ASSERT(is_dynamic());
return section(m_dynamic_section_index); return section(m_dynamic_section_index);
} }
const ELFImage::DynamicRelocationSection ELFImage::dynamic_relocation_section() const
{
ASSERT(is_dynamic());
return section(m_dynamic_relocation_section_index);
}

View file

@ -16,13 +16,9 @@ public:
class Section; class Section;
class RelocationSection; class RelocationSection;
class DynamicRelocationSection;
class Symbol; class Symbol;
class DynamicSymbol;
class Relocation; class Relocation;
class DynamicRelocation;
class DynamicSection; class DynamicSection;
class DynamicSectionEntry;
class Symbol { class Symbol {
public: public:
@ -50,32 +46,6 @@ public:
const unsigned m_index; const unsigned m_index;
}; };
class DynamicSymbol {
public:
DynamicSymbol(const ELFImage& image, unsigned index, const Elf32_Sym& sym)
: m_image(image)
, m_sym(sym)
, m_index(index)
{
}
~DynamicSymbol() {}
const char* name() const { return m_image.dynamic_table_string(m_sym.st_name); }
unsigned section_index() const { return m_sym.st_shndx; }
unsigned value() const { return m_sym.st_value; }
unsigned size() const { return m_sym.st_size; }
unsigned index() const { return m_index; }
unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); }
unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); }
const Section section() const { return m_image.section(section_index()); }
private:
const ELFImage& m_image;
const Elf32_Sym& m_sym;
const unsigned m_index;
};
class ProgramHeader { class ProgramHeader {
public: public:
ProgramHeader(const ELFImage& image, unsigned program_header_index) ProgramHeader(const ELFImage& image, unsigned program_header_index)
@ -151,38 +121,6 @@ public:
void for_each_relocation(F) const; void for_each_relocation(F) const;
}; };
class DynamicRelocationSection : public Section {
public:
DynamicRelocationSection(const Section& section)
: Section(section.m_image, section.m_section_index)
{
}
unsigned relocation_count() const { return entry_count(); }
const DynamicRelocation relocation(unsigned index) const;
template<typename F>
void for_each_relocation(F) const;
};
class DynamicRelocation {
public:
DynamicRelocation(const ELFImage& image, const Elf32_Rel& rel)
: m_image(image)
, m_rel(rel)
{
}
~DynamicRelocation() {}
unsigned offset() const { return m_rel.r_offset; }
unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); }
unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); }
const DynamicSymbol symbol() const { return m_image.dynamic_symbol(symbol_index()); }
private:
const ELFImage& m_image;
const Elf32_Rel& m_rel;
};
class Relocation { class Relocation {
public: public:
Relocation(const ELFImage& image, const Elf32_Rel& rel) Relocation(const ELFImage& image, const Elf32_Rel& rel)
@ -210,28 +148,6 @@ public:
{ {
ASSERT(type() == SHT_DYNAMIC); ASSERT(type() == SHT_DYNAMIC);
} }
template<typename F>
void for_each_dynamic_entry(F) const;
};
class DynamicSectionEntry {
public:
DynamicSectionEntry(const ELFImage& image, const Elf32_Dyn& dyn)
: m_image(image)
, m_dyn(dyn)
{
}
~DynamicSectionEntry() {}
Elf32_Sword tag() const { return m_dyn.d_tag; }
Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; }
Elf32_Word val() const { return m_dyn.d_un.d_val; }
private:
const ELFImage& m_image;
const Elf32_Dyn& m_dyn;
}; };
unsigned symbol_count() const; unsigned symbol_count() const;
@ -240,11 +156,9 @@ public:
unsigned program_header_count() const; unsigned program_header_count() const;
const Symbol symbol(unsigned) const; const Symbol symbol(unsigned) const;
const DynamicSymbol dynamic_symbol(unsigned) const;
const Section section(unsigned) const; const Section section(unsigned) const;
const ProgramHeader program_header(unsigned const) const; const ProgramHeader program_header(unsigned const) const;
const DynamicSection dynamic_section() const; const DynamicSection dynamic_section() const;
const DynamicRelocationSection dynamic_relocation_section() const;
template<typename F> template<typename F>
void for_each_section(F) const; void for_each_section(F) const;
@ -253,8 +167,6 @@ public:
template<typename F> template<typename F>
void for_each_symbol(F) const; void for_each_symbol(F) const;
template<typename F> template<typename F>
void for_each_dynamic_symbol(F) const;
template<typename F>
void for_each_program_header(F) const; void for_each_program_header(F) const;
// NOTE: Returns section(0) if section with name is not found. // NOTE: Returns section(0) if section with name is not found.
@ -276,17 +188,13 @@ private:
const char* table_string(unsigned offset) const; const char* table_string(unsigned offset) const;
const char* section_header_table_string(unsigned offset) const; const char* section_header_table_string(unsigned offset) const;
const char* section_index_to_string(unsigned index) const; const char* section_index_to_string(unsigned index) const;
const char* dynamic_table_string(unsigned offset) const;
const u8* m_buffer { nullptr }; const u8* m_buffer { nullptr };
HashMap<String, unsigned> m_sections; HashMap<String, unsigned> m_sections;
bool m_valid { false }; bool m_valid { false };
unsigned m_symbol_table_section_index { 0 }; unsigned m_symbol_table_section_index { 0 };
unsigned m_string_table_section_index { 0 }; unsigned m_string_table_section_index { 0 };
unsigned m_dynamic_symbol_table_section_index { 0 }; // .dynsym unsigned m_dynamic_section_index { 0 };
unsigned m_dynamic_string_table_section_index { 0 }; // .dynstr
unsigned m_dynamic_section_index { 0 }; // .dynamic
unsigned m_dynamic_relocation_section_index { 0 }; // .rel.dyn
}; };
template<typename F> template<typename F>
@ -317,15 +225,6 @@ inline void ELFImage::RelocationSection::for_each_relocation(F func) const
} }
} }
template<typename F>
inline void ELFImage::DynamicRelocationSection::for_each_relocation(F func) const
{
for (unsigned i = 0; i < relocation_count(); ++i) {
if (func(relocation(i)) == IterationDecision::Break)
break;
}
}
template<typename F> template<typename F>
inline void ELFImage::for_each_symbol(F func) const inline void ELFImage::for_each_symbol(F func) const
{ {
@ -335,31 +234,9 @@ inline void ELFImage::for_each_symbol(F func) const
} }
} }
template<typename F>
inline void ELFImage::for_each_dynamic_symbol(F func) const
{
for (unsigned i = 0; i < dynamic_symbol_count(); ++i) {
if (func(symbol(i)) == IterationDecision::Break)
break;
}
}
template<typename F> template<typename F>
inline void ELFImage::for_each_program_header(F func) const inline void ELFImage::for_each_program_header(F func) const
{ {
for (unsigned i = 0; i < program_header_count(); ++i) for (unsigned i = 0; i < program_header_count(); ++i)
func(program_header(i)); func(program_header(i));
} }
template<typename F>
inline void ELFImage::DynamicSection::for_each_dynamic_entry(F func) const
{
auto* dyns = reinterpret_cast<const Elf32_Dyn*>(m_image.raw_data(offset()));
for (unsigned i = 0;; ++i) {
auto&& dyn = DynamicSectionEntry(m_image, dyns[i]);
if (dyn.tag() == DT_NULL)
break;
if (func(dyn) == IterationDecision::Break)
break;
}
}