diff --git a/Libraries/LibELF/ELFDynamicObject.cpp b/Libraries/LibELF/ELFDynamicObject.cpp new file mode 100644 index 0000000000..1cf238ac88 --- /dev/null +++ b/Libraries/LibELF/ELFDynamicObject.cpp @@ -0,0 +1,608 @@ +#include +#include + +#include +#include +#include +#include + +#define DYNAMIC_LOAD_DEBUG +//#define DYNAMIC_LOAD_VERBOSE + +#ifdef DYNAMIC_LOAD_VERBOSE +# define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__) +#else +# define VERBOSE(fmt, ...) do { } while (0) +#endif + +static bool s_always_bind_now = true; + +static const char* name_for_dtag(Elf32_Sword tag); + +// SYSV ELF hash algorithm +// Note that the GNU HASH algorithm has less collisions +static uint32_t calculate_elf_hash(const char* name) +{ + uint32_t hash = 0; + uint32_t top_nibble_of_hash = 0; + + while (*name != '\0') { + hash = hash << 4; + hash += *name; + name++; + + top_nibble_of_hash = hash & 0xF0000000U; + if (top_nibble_of_hash != 0) + hash ^= top_nibble_of_hash >> 24; + hash &= ~top_nibble_of_hash; + } + + return hash; +} + +NonnullRefPtr ELFDynamicObject::construct(const char* filename, int fd, size_t size) +{ + return adopt(*new ELFDynamicObject(filename, fd, size)); +} + +ELFDynamicObject::ELFDynamicObject(const char* filename, int fd, size_t size) + : m_filename(filename) + , m_file_size(size) + , m_image_fd(fd) +{ + String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters()); + + m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters()); + if (MAP_FAILED == m_file_mapping) { + m_valid = false; + return; + } + + m_image = AK::make((u8*)m_file_mapping); + + m_valid = m_image->is_valid() && m_image->parse() && m_image->is_dynamic(); + + if (!m_valid) { + return; + } + + const ELFImage::DynamicSection probably_dynamic_section = m_image->dynamic_section(); + if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) { + m_valid = false; + return; + } +} + +ELFDynamicObject::~ELFDynamicObject() +{ + if (MAP_FAILED != m_file_mapping) + munmap(m_file_mapping, m_file_size); +} + +void ELFDynamicObject::dump() +{ + auto dynamic_section = m_image->dynamic_section(); + + StringBuilder builder; + builder.append("\nd_tag tag_name value\n"); + size_t num_dynamic_sections = 0; + + dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) { + String name_field = String::format("(%s)", name_for_dtag(entry.tag())); + builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val()); + num_dynamic_sections++; + return IterationDecision::Continue; + }); + + dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", dynamic_section.offset(), num_dynamic_sections); + dbgprintf(builder.to_string().characters()); +} + +void ELFDynamicObject::parse_dynamic_section() +{ + auto dynamic_section = m_image->dynamic_section(); + dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) { + switch (entry.tag()) { + case DT_INIT: + m_init_offset = entry.ptr(); + break; + case DT_FINI: + m_fini_offset = entry.ptr(); + break; + case DT_INIT_ARRAY: + m_init_array_offset = entry.ptr(); + break; + case DT_INIT_ARRAYSZ: + m_init_array_size = entry.val(); + break; + case DT_HASH: + m_hash_table_offset = entry.ptr(); + break; + case DT_SYMTAB: + m_symbol_table_offset = entry.ptr(); + break; + case DT_STRTAB: + m_string_table_offset = entry.ptr(); + break; + case DT_STRSZ: + m_size_of_string_table = entry.val(); + break; + case DT_SYMENT: + m_size_of_symbol_table_entry = entry.val(); + break; + case DT_PLTGOT: + m_procedure_linkage_table_offset = entry.ptr(); + break; + case DT_PLTRELSZ: + m_size_of_plt_relocation_entry_list = entry.val(); + break; + case DT_PLTREL: + m_procedure_linkage_table_relocation_type = entry.val(); + ASSERT(m_procedure_linkage_table_relocation_type & (DT_REL | DT_RELA)); + break; + case DT_JMPREL: + m_plt_relocation_offset_location = entry.ptr(); + break; + case DT_RELA: + case DT_REL: + m_relocation_table_offset = entry.ptr(); + break; + case DT_RELASZ: + case DT_RELSZ: + m_size_of_relocation_table = entry.val(); + break; + case DT_RELAENT: + case DT_RELENT: + m_size_of_relocation_entry = entry.val(); + break; + case DT_RELACOUNT: + case DT_RELCOUNT: + m_number_of_relocations = entry.val(); + break; + case DT_FLAGS: + m_must_bind_now = entry.val() & DF_BIND_NOW; + m_has_text_relocations = entry.val() & DF_TEXTREL; + m_should_process_origin = entry.val() & DF_ORIGIN; + m_has_static_thread_local_storage = entry.val() & DF_STATIC_TLS; + m_requires_symbolic_symbol_resolution = entry.val() & DF_SYMBOLIC; + break; + case DT_TEXTREL: + m_has_text_relocations = true; // This tag seems to exist for legacy reasons only? + break; + default: + dbgprintf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag())); + printf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag())); + ASSERT_NOT_REACHED(); // FIXME: Maybe just break out here and return false? + break; + } + return IterationDecision::Continue; + }); +} + +typedef void (*InitFunc)(); + +bool ELFDynamicObject::load(unsigned flags) +{ + ASSERT(flags & RTLD_GLOBAL); + ASSERT(flags & RTLD_LAZY); + +#ifdef DYNAMIC_LOAD_DEBUG + dump(); +#endif +#ifdef DYNAMIC_LOAD_VERBOSE + m_image->dump(); +#endif + + parse_dynamic_section(); + + // FIXME: be more flexible? + size_t total_required_allocation_size = 0; + + // FIXME: Can we re-use ELFLoader? This and what follows looks a lot like what's in there... + // With the exception of using desired_load_address().offset(text_segment_begin) + // It seems kinda gross to expect the program headers to be in a specific order.. + m_image->for_each_program_header([&](const ELFImage::ProgramHeader& program_header) { + ProgramHeaderRegion new_region(program_header.raw_header()); + if (new_region.is_load()) + total_required_allocation_size += new_region.required_load_size(); + m_program_header_regions.append(move(new_region)); + auto& region = m_program_header_regions.last(); + if (region.is_tls_template()) + m_tls_region = ®ion; + else if (region.is_load()) { + if (region.is_executable()) + m_text_region = ®ion; + else + m_data_region = ®ion; + } + }); + + ASSERT(m_text_region && m_data_region); + + // Process regions in order: .text, .data, .tls + auto* region = m_text_region; + void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters()); + size_t text_segment_size = region->required_load_size(); + region->set_base_address(VirtualAddress { (u32)text_segment_begin }); + region->set_load_address(VirtualAddress { (u32)text_segment_begin }); + + region = m_data_region; + void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters()); + size_t data_segment_size = region->required_load_size(); + VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); + region->set_base_address(VirtualAddress { (u32)text_segment_begin }); + region->set_load_address(data_segment_actual_addr); + memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); + + if (m_tls_region) { + region = m_data_region; + VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); + region->set_base_address(VirtualAddress { (u32)text_segment_begin }); + region->set_load_address(tls_segment_actual_addr); + memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); + } + + // sanity check + u8* end_of_in_memory_image = (u8*)data_segment_begin + data_segment_size; + ASSERT((ptrdiff_t)total_required_allocation_size == (ptrdiff_t)(end_of_in_memory_image - (u8*)text_segment_begin)); + + if (m_has_text_relocations) { + if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) { + perror("mprotect"); // FIXME: dlerror? + return false; + } + } + + do_relocations(); + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Done relocating!\n"); +#endif + + // FIXME: PLT patching doesn't seem to work as expected. + // Need to dig into the spec to see what we're doing wrong + // Hopefully it won't need an assembly entry point... :/ + /// For now we can just BIND_NOW every time + + // This should be the address of section ".got.plt" + const ELFImage::Section& got_section = m_image->lookup_section(".got.plt"); + VirtualAddress got_address = m_text_region->load_address().offset(got_section.address()); + + u32* got_u32_ptr = reinterpret_cast(got_address.as_ptr()); + got_u32_ptr[1] = (u32)this; + got_u32_ptr[2] = (u32)&ELFDynamicObject::patch_plt_entry; + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Set GOT PLT entries at %p: [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]); +#endif + + // Clean up our setting of .text to PROT_READ | PROT_WRITE + if (m_has_text_relocations) { + if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) { + perror("mprotect"); // FIXME: dlerror? + return false; + } + } + + u8* load_addr = m_text_region->load_address().as_ptr(); + InitFunc init_function = (InitFunc)(load_addr + m_init_offset); + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Calling DT_INIT at %p\n", init_function); +#endif + // FIXME: + // Disassembly of section .init: + // + // 00007e98 <_init>: + // 7e98: 55 push ebp + // + // Where da ret at? related to -nostartfiles for sure... + //(init_function)(); + + InitFunc* init_begin = (InitFunc*)(load_addr + m_init_array_offset); + u32 init_end = (u32)((u8*)init_begin + m_init_array_size); + while ((u32)init_begin < init_end) { + // Andriod sources claim that these can be -1, to be ignored. + // 0 definitely shows up. Apparently 0/-1 are valid? Confusing. + if (!*init_begin || ((i32)*init_begin == -1)) + continue; +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin); +#endif + (*init_begin)(); + ++init_begin; + } + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Loaded %s\n", m_filename.characters()); +#endif + // FIXME: return false sometimes? missing symbol etc + return true; +} + +void* ELFDynamicObject::symbol_for_name(const char* name) +{ + // FIXME: If we enable gnu hash in the compiler, we should use that here instead + // The algo is way better with less collisions + uint32_t hash_value = calculate_elf_hash(name); + + u8* load_addr = m_text_region->load_address().as_ptr(); + + // NOTE: We need to use the loaded hash/string/symbol tables here to get the right + // addresses. The ones that are in the ELFImage won't cut it, they aren't relocated + u32* hash_table_begin = (u32*)(load_addr + m_hash_table_offset); + Elf32_Sym* symtab = (Elf32_Sym*)(load_addr + m_symbol_table_offset); + const char* strtab = (const char*)load_addr + m_string_table_offset; + + size_t num_buckets = hash_table_begin[0]; + + // This is here for completeness, but, since we're using the fact that every chain + // will end at chain 0 (which means 'not found'), we don't need to check num_chains. + // Interestingly, num_chains is required to be num_symbols + //size_t num_chains = hash_table_begin[1]; + + u32* buckets = &hash_table_begin[2]; + u32* chains = &buckets[num_buckets]; + + for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) { + if (strcmp(name, strtab + symtab[i].st_name) == 0) { + void* retval = load_addr + symtab[i].st_value; +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, strtab + symtab[i].st_name, retval); +#endif + return retval; + } + } + + return nullptr; +} + +// offset is from PLT entry +// Tag is inserted into GOT #2 for 'this' DSO (literally the this pointer) +void ELFDynamicObject::patch_plt_entry(u32 got_offset, void* dso_got_tag) +{ + // FIXME: This is never called :( + CRASH(); + dbgprintf("------ PATCHING PLT ENTRY -------"); + // NOTE: We put 'this' into the GOT when we loaded it into memory + auto* dynamic_object_object = reinterpret_cast(dso_got_tag); + + // FIXME: might actually be a RelA, check m_plt_relocation_type + // u32 base_addr_offset = dynamic_object_object->m_relocation_table_offset + got_offset; + // Elf32_Rel relocation = *reinterpret_cast(&((u8*)dynamic_object_object->m_file_mapping)[base_addr_offset]); + u32 relocation_index = got_offset / dynamic_object_object->m_size_of_relocation_entry; + auto relocation = dynamic_object_object->m_image->dynamic_relocation_section().relocation(relocation_index); + + ASSERT(relocation.type() == R_386_JMP_SLOT); + + auto sym = relocation.symbol(); + + auto* text_load_address = dynamic_object_object->m_text_region->load_address().as_ptr(); + u8* relocation_address = text_load_address + relocation.offset(); + + if (0 > mprotect(text_load_address, dynamic_object_object->m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) { + ASSERT_NOT_REACHED(); // uh oh, no can do boss + } + + dbgprintf("Found relocation address: %p for %s", relocation_address, sym.name()); + + *(u32*)relocation_address = (u32)(text_load_address + sym.value()); + + if (0 > mprotect(text_load_address, dynamic_object_object->m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) { + ASSERT_NOT_REACHED(); // uh oh, no can do boss + } + + CRASH(); + // FIXME: Call the relocated method here? +} + +void ELFDynamicObject::do_relocations() +{ + auto dyn_relocation_section = m_image->dynamic_relocation_section(); + if (StringView(".rel.dyn") != dyn_relocation_section.name() || SHT_REL != dyn_relocation_section.type()) { + ASSERT_NOT_REACHED(); + } + + u8* load_base_address = m_text_region->base_address().as_ptr(); + + int i = -1; + + // FIXME: We should really bail on undefined symbols here. (but, there's some TLS vars that are currently undef soooo.... :) ) + + dyn_relocation_section.for_each_relocation([&](const ELFImage::DynamicRelocation& relocation) { + ++i; + VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", i, relocation.offset(), relocation.type(), relocation.symbol_index()); + u32* patch_ptr = (u32*)(load_base_address + relocation.offset()); + switch (relocation.type()) { + case R_386_NONE: + // Apparently most loaders will just skip these? + // Seems if the 'link editor' generates one something is funky with your code + VERBOSE("None relocation. No symbol, no nothin.\n"); + break; + case R_386_32: { + auto symbol = relocation.symbol(); + + VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value()); + if (symbol.bind() == STB_LOCAL) { + u32 symbol_address = symbol.section().address() + symbol.value(); + *patch_ptr += symbol_address; + } else if (symbol.bind() == STB_GLOBAL) { + u32 symbol_address = symbol.value() + (u32)load_base_address; + *patch_ptr += symbol_address; + } else if (symbol.bind() == STB_WEAK) { + // FIXME: Handle weak symbols... + dbgprintf("ELFDynamicObject: Ignoring weak symbol %s\n", symbol.name()); + } else { + VERBOSE("Found new fun symbol bind value %d\n", symbol.bind()); + ASSERT_NOT_REACHED(); + } + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_PC32: { + auto symbol = relocation.symbol(); + VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value()); + u32 relative_offset = (symbol.value() - relocation.offset()); + *patch_ptr += relative_offset; + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_GLOB_DAT: { + auto symbol = relocation.symbol(); + VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value()); + u32 symbol_location = (u32)(m_data_region->base_address().as_ptr() + symbol.value()); + *patch_ptr = symbol_location; + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_RELATIVE: { + // FIXME: According to the spec, R_386_relative ones must be done first. + // We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT + // However, our compiler is nice enough to put them at the front of the relocations for us :) + VERBOSE("Load address relocation at offset %X\n", relocation.offset()); + VERBOSE(" patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + (u32)load_base_address); + *patch_ptr += (u32)load_base_address; // + addend for RelA (addend for Rel is stored at addr) + break; + } + case R_386_TLS_TPOFF: { + VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset()); + // FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means... + // FIXME: Check m_has_static_tls and do something different for dynamic TLS + VirtualAddress tls_region_loctation = m_tls_region->desired_load_address(); + *patch_ptr = relocation.offset() - (u32)tls_region_loctation.as_ptr() - *patch_ptr; + break; + } + default: + // Raise the alarm! Someone needs to implement this relocation type + dbgprintf("Found a new exciting relocation type %d\n", relocation.type()); + printf("ELFDynamicObject: Found unknown relocation type %d\n", relocation.type()); + ASSERT_NOT_REACHED(); + break; + } + return IterationDecision::Continue; + }); + + // FIXME: Or BIND_NOW flag passed in? + if (m_must_bind_now || s_always_bind_now) { + // FIXME: Why do we keep jumping to the entry in the GOT without going to our callback first? + // that would make this s_always_bind_now redundant + + for (size_t idx = 0; idx < m_size_of_plt_relocation_entry_list; idx += m_size_of_relocation_entry) { + VirtualAddress relocation_vaddr = m_text_region->load_address().offset(m_plt_relocation_offset_location).offset(idx); + Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_vaddr.as_ptr(); + + ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT); + + auto sym = m_image->dynamic_symbol(ELF32_R_SYM(jump_slot_relocation->r_info)); + + auto* image_base_address = m_text_region->base_address().as_ptr(); + u8* relocation_address = image_base_address + jump_slot_relocation->r_offset; + u32 symbol_location = (u32)(image_base_address + sym.value()); + + VERBOSE("ELFDynamicObject: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address); + + *(u32*)relocation_address = symbol_location; + } + } +} + +u32 ELFDynamicObject::ProgramHeaderRegion::mmap_prot() const +{ + int prot = 0; + prot |= is_executable() ? PROT_EXEC : 0; + prot |= is_readable() ? PROT_READ : 0; + prot |= is_writable() ? PROT_WRITE : 0; + return prot; +} + +static const char* name_for_dtag(Elf32_Sword d_tag) +{ + switch (d_tag) { + case DT_NULL: + return "NULL"; /* marks end of _DYNAMIC array */ + case DT_NEEDED: + return "NEEDED"; /* string table offset of needed lib */ + case DT_PLTRELSZ: + return "PLTRELSZ"; /* size of relocation entries in PLT */ + case DT_PLTGOT: + return "PLTGOT"; /* address PLT/GOT */ + case DT_HASH: + return "HASH"; /* address of symbol hash table */ + case DT_STRTAB: + return "STRTAB"; /* address of string table */ + case DT_SYMTAB: + return "SYMTAB"; /* address of symbol table */ + case DT_RELA: + return "RELA"; /* address of relocation table */ + case DT_RELASZ: + return "RELASZ"; /* size of relocation table */ + case DT_RELAENT: + return "RELAENT"; /* size of relocation entry */ + case DT_STRSZ: + return "STRSZ"; /* size of string table */ + case DT_SYMENT: + return "SYMENT"; /* size of symbol table entry */ + case DT_INIT: + return "INIT"; /* address of initialization func. */ + case DT_FINI: + return "FINI"; /* address of termination function */ + case DT_SONAME: + return "SONAME"; /* string table offset of shared obj */ + case DT_RPATH: + return "RPATH"; /* string table offset of library search path */ + case DT_SYMBOLIC: + return "SYMBOLIC"; /* start sym search in shared obj. */ + case DT_REL: + return "REL"; /* address of rel. tbl. w addends */ + case DT_RELSZ: + return "RELSZ"; /* size of DT_REL relocation table */ + case DT_RELENT: + return "RELENT"; /* size of DT_REL relocation entry */ + case DT_PLTREL: + return "PLTREL"; /* PLT referenced relocation entry */ + case DT_DEBUG: + return "DEBUG"; /* bugger */ + case DT_TEXTREL: + return "TEXTREL"; /* Allow rel. mod. to unwritable seg */ + case DT_JMPREL: + return "JMPREL"; /* add. of PLT's relocation entries */ + case DT_BIND_NOW: + return "BIND_NOW"; /* Bind now regardless of env setting */ + case DT_INIT_ARRAY: + return "INIT_ARRAY"; /* address of array of init func */ + case DT_FINI_ARRAY: + return "FINI_ARRAY"; /* address of array of term func */ + case DT_INIT_ARRAYSZ: + return "INIT_ARRAYSZ"; /* size of array of init func */ + case DT_FINI_ARRAYSZ: + return "FINI_ARRAYSZ"; /* size of array of term func */ + case DT_RUNPATH: + return "RUNPATH"; /* strtab offset of lib search path */ + case DT_FLAGS: + return "FLAGS"; /* Set of DF_* flags */ + case DT_ENCODING: + return "ENCODING"; /* further DT_* follow encoding rules */ + case DT_PREINIT_ARRAY: + return "PREINIT_ARRAY"; /* address of array of preinit func */ + case DT_PREINIT_ARRAYSZ: + return "PREINIT_ARRAYSZ"; /* size of array of preinit func */ + case DT_LOOS: + return "LOOS"; /* reserved range for OS */ + case DT_HIOS: + return "HIOS"; /* specific dynamic array tags */ + case DT_LOPROC: + return "LOPROC"; /* reserved range for processor */ + case DT_HIPROC: + return "HIPROC"; /* specific dynamic array tags */ + case DT_GNU_HASH: + return "GNU_HASH"; /* address of GNU hash table */ + case DT_RELACOUNT: + return "RELACOUNT"; /* if present, number of RELATIVE */ + case DT_RELCOUNT: + return "RELCOUNT"; /* relocs, which must come first */ + case DT_FLAGS_1: + return "FLAGS_1"; + default: + return "??"; + } +} diff --git a/Libraries/LibELF/ELFDynamicObject.h b/Libraries/LibELF/ELFDynamicObject.h new file mode 100644 index 0000000000..98f09e9b1a --- /dev/null +++ b/Libraries/LibELF/ELFDynamicObject.h @@ -0,0 +1,122 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1))) + +class ELFDynamicObject : public RefCounted { +public: + static NonnullRefPtr construct(const char* filename, int fd, size_t file_size); + + ~ELFDynamicObject(); + + bool is_valid() const { return m_valid; } + + // FIXME: How can we resolve all of the symbols without having the original elf image for our process? + // RTLD_LAZY only at first probably... though variables ('objects') need resolved at load time every time + bool load(unsigned flags); + + // Intended for use by dlsym or other internal methods + void* symbol_for_name(const char*); + + void dump(); + +private: + class ProgramHeaderRegion { + public: + ProgramHeaderRegion(const Elf32_Phdr& header) + : m_program_header(header) + { + } + + VirtualAddress load_address() const { return m_load_address; } + VirtualAddress base_address() const { return m_image_base_address; } + + void set_load_address(VirtualAddress addr) { m_load_address = addr; } + void set_base_address(VirtualAddress addr) { m_image_base_address = addr; } + + // Information from ELF Program header + u32 type() const { return m_program_header.p_type; } + u32 flags() const { return m_program_header.p_flags; } + u32 offset() const { return m_program_header.p_offset; } + VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); } + u32 size_in_memory() const { return m_program_header.p_memsz; } + u32 size_in_image() const { return m_program_header.p_filesz; } + u32 alignment() const { return m_program_header.p_align; } + u32 mmap_prot() const; + bool is_readable() const { return flags() & PF_R; } + bool is_writable() const { return flags() & PF_W; } + bool is_executable() const { return flags() & PF_X; } + bool is_tls_template() const { return type() == PT_TLS; } + bool is_load() const { return type() == PT_LOAD; } + bool is_dynamic() const { return type() == PT_DYNAMIC; } + + u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); } + + private: + Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image + VirtualAddress m_load_address { 0 }; + VirtualAddress m_image_base_address { 0 }; + }; + + explicit ELFDynamicObject(const char* filename, int fd, size_t file_size); + + String m_filename; + size_t m_file_size { 0 }; + int m_image_fd { -1 }; + void* m_file_mapping { nullptr }; + bool m_valid { false }; + + OwnPtr m_image; + + void parse_dynamic_section(); + void do_relocations(); + + static void patch_plt_entry(u32 got_offset, void* dso_got_tag); + + Vector m_program_header_regions; + ProgramHeaderRegion* m_text_region { nullptr }; + ProgramHeaderRegion* m_data_region { nullptr }; + ProgramHeaderRegion* m_tls_region { nullptr }; + + // Begin Section information collected from DT_* entries + uintptr_t m_init_offset { 0 }; + uintptr_t m_fini_offset { 0 }; + + uintptr_t m_init_array_offset { 0 }; + size_t m_init_array_size { 0 }; + + uintptr_t m_hash_table_offset { 0 }; + + uintptr_t m_string_table_offset { 0 }; + uintptr_t m_symbol_table_offset { 0 }; + size_t m_size_of_string_table { 0 }; + size_t m_size_of_symbol_table_entry { 0 }; + + Elf32_Sword m_procedure_linkage_table_relocation_type { -1 }; + uintptr_t m_plt_relocation_offset_location { 0 }; // offset of PLT relocations, at end of relocations + size_t m_size_of_plt_relocation_entry_list { 0 }; + uintptr_t m_procedure_linkage_table_offset { 0 }; + + // NOTE: We'll only ever either RELA or REL entries, not both (thank god) + size_t m_number_of_relocations { 0 }; + size_t m_size_of_relocation_entry { 0 }; + size_t m_size_of_relocation_table { 0 }; + uintptr_t m_relocation_table_offset { 0 }; + + // DT_FLAGS + bool m_should_process_origin = false; + bool m_requires_symbolic_symbol_resolution = false; + // Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ + bool m_has_text_relocations = false; + bool m_must_bind_now = false; // FIXME: control with an environment var as well? + bool m_has_static_thread_local_storage = false; + // End Section information from DT_* entries +}; diff --git a/Libraries/LibELF/ELFImage.cpp b/Libraries/LibELF/ELFImage.cpp index dfd9cb73b8..832711afbc 100644 --- a/Libraries/LibELF/ELFImage.cpp +++ b/Libraries/LibELF/ELFImage.cpp @@ -43,6 +43,11 @@ unsigned ELFImage::symbol_count() const return section(m_symbol_table_section_index).entry_count(); } +unsigned ELFImage::dynamic_symbol_count() const +{ + return section(m_dynamic_symbol_table_section_index).entry_count(); +} + void ELFImage::dump() const { dbgprintf("ELFImage{%p} {\n", this); @@ -110,8 +115,25 @@ bool ELFImage::parse() m_symbol_table_section_index = i; } if (sh.sh_type == SHT_STRTAB && i != header().e_shstrndx) { - ASSERT(!m_string_table_section_index || m_string_table_section_index == i); - m_string_table_section_index = i; + if (StringView(".strtab") == section_header_table_string(sh.sh_name)) + m_string_table_section_index = i; + else if (StringView(".dynstr") == section_header_table_string(sh.sh_name)) + m_dynamic_string_table_section_index = i; + else + ASSERT_NOT_REACHED(); + } + if (sh.sh_type == SHT_DYNAMIC) { + ASSERT(!m_dynamic_section_index || m_dynamic_section_index == i); + m_dynamic_section_index = i; + } + if (sh.sh_type == SHT_DYNSYM) { + ASSERT(!m_dynamic_symbol_table_section_index || m_dynamic_symbol_table_section_index == i); + m_dynamic_symbol_table_section_index = i; + } + if (sh.sh_type == SHT_REL) { + if (StringView(".rel.dyn") == section_header_table_string(sh.sh_name)) { + m_dynamic_relocation_section_index = i; + } } } @@ -140,6 +162,14 @@ const char* ELFImage::table_string(unsigned offset) const return raw_data(sh.sh_offset + offset); } +const char* ELFImage::dynamic_table_string(unsigned offset) const +{ + auto& sh = section_header(m_dynamic_string_table_section_index); + if (sh.sh_type != SHT_STRTAB) + return nullptr; + return raw_data(sh.sh_offset + offset); +} + const char* ELFImage::raw_data(unsigned offset) const { return reinterpret_cast(m_buffer) + offset; @@ -159,7 +189,7 @@ const Elf32_Phdr& ELFImage::program_header_internal(unsigned index) const const Elf32_Shdr& ELFImage::section_header(unsigned index) const { ASSERT(index < header().e_shnum); - return *reinterpret_cast(raw_data(header().e_shoff + (index * sizeof(Elf32_Shdr)))); + return *reinterpret_cast(raw_data(header().e_shoff + (index * header().e_shentsize))); } const ELFImage::Symbol ELFImage::symbol(unsigned index) const @@ -169,6 +199,13 @@ const ELFImage::Symbol ELFImage::symbol(unsigned index) const return Symbol(*this, index, raw_syms[index]); } +const ELFImage::DynamicSymbol ELFImage::dynamic_symbol(unsigned index) const +{ + ASSERT(index < symbol_count()); + auto* raw_syms = reinterpret_cast(raw_data(section(m_dynamic_symbol_table_section_index).offset())); + return DynamicSymbol(*this, index, raw_syms[index]); +} + const ELFImage::Section ELFImage::section(unsigned index) const { ASSERT(index < section_count()); @@ -188,6 +225,13 @@ const ELFImage::Relocation ELFImage::RelocationSection::relocation(unsigned inde return Relocation(m_image, rels[index]); } +const ELFImage::DynamicRelocation ELFImage::DynamicRelocationSection::relocation(unsigned index) const +{ + ASSERT(index < relocation_count()); + auto* rels = reinterpret_cast(m_image.raw_data(offset())); + return DynamicRelocation(m_image, rels[index]); +} + const ELFImage::RelocationSection ELFImage::Section::relocations() const { // FIXME: This is ugly. @@ -213,3 +257,15 @@ const ELFImage::Section ELFImage::lookup_section(const char* name) const return section((*it).value); return section(0); } + +const ELFImage::DynamicSection ELFImage::dynamic_section() const +{ + ASSERT(is_dynamic()); + return section(m_dynamic_section_index); +} + +const ELFImage::DynamicRelocationSection ELFImage::dynamic_relocation_section() const +{ + ASSERT(is_dynamic()); + return section(m_dynamic_relocation_section_index); +} diff --git a/Libraries/LibELF/ELFImage.h b/Libraries/LibELF/ELFImage.h index 5ca519e2c5..e30e2a71e0 100644 --- a/Libraries/LibELF/ELFImage.h +++ b/Libraries/LibELF/ELFImage.h @@ -16,8 +16,13 @@ public: class Section; class RelocationSection; + class DynamicRelocationSection; class Symbol; + class DynamicSymbol; class Relocation; + class DynamicRelocation; + class DynamicSection; + class DynamicSectionEntry; class Symbol { public: @@ -45,6 +50,32 @@ public: const unsigned m_index; }; + class DynamicSymbol { + public: + DynamicSymbol(const ELFImage& image, unsigned index, const Elf32_Sym& sym) + : m_image(image) + , m_sym(sym) + , m_index(index) + { + } + + ~DynamicSymbol() {} + + const char* name() const { return m_image.dynamic_table_string(m_sym.st_name); } + unsigned section_index() const { return m_sym.st_shndx; } + unsigned value() const { return m_sym.st_value; } + unsigned size() const { return m_sym.st_size; } + unsigned index() const { return m_index; } + unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); } + unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); } + const Section section() const { return m_image.section(section_index()); } + + private: + const ELFImage& m_image; + const Elf32_Sym& m_sym; + const unsigned m_index; + }; + class ProgramHeader { public: ProgramHeader(const ELFImage& image, unsigned program_header_index) @@ -67,6 +98,7 @@ public: bool is_writable() const { return flags() & PF_W; } bool is_executable() const { return flags() & PF_X; } const char* raw_data() const { return m_image.raw_data(m_program_header.p_offset); } + Elf32_Phdr raw_header() const { return m_program_header; } private: const ELFImage& m_image; @@ -100,6 +132,8 @@ public: protected: friend class RelocationSection; + friend class DynamicSection; + friend class DynamicRelocationSection; const ELFImage& m_image; const Elf32_Shdr& m_section_header; unsigned m_section_index; @@ -117,6 +151,38 @@ public: void for_each_relocation(F) const; }; + class DynamicRelocationSection : public Section { + public: + DynamicRelocationSection(const Section& section) + : Section(section.m_image, section.m_section_index) + { + } + unsigned relocation_count() const { return entry_count(); } + const DynamicRelocation relocation(unsigned index) const; + template + void for_each_relocation(F) const; + }; + + class DynamicRelocation { + public: + DynamicRelocation(const ELFImage& image, const Elf32_Rel& rel) + : m_image(image) + , m_rel(rel) + { + } + + ~DynamicRelocation() {} + + unsigned offset() const { return m_rel.r_offset; } + unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); } + unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); } + const DynamicSymbol symbol() const { return m_image.dynamic_symbol(symbol_index()); } + + private: + const ELFImage& m_image; + const Elf32_Rel& m_rel; + }; + class Relocation { public: Relocation(const ELFImage& image, const Elf32_Rel& rel) @@ -137,13 +203,48 @@ public: const Elf32_Rel& m_rel; }; + class DynamicSection : public Section { + public: + DynamicSection(const Section& section) + : Section(section.m_image, section.m_section_index) + { + ASSERT(type() == SHT_DYNAMIC); + } + + template + void for_each_dynamic_entry(F) const; + }; + + class DynamicSectionEntry { + public: + DynamicSectionEntry(const ELFImage& image, const Elf32_Dyn& dyn) + : m_image(image) + , m_dyn(dyn) + { + } + + ~DynamicSectionEntry() {} + + Elf32_Sword tag() const { return m_dyn.d_tag; } + Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; } + Elf32_Word val() const { return m_dyn.d_un.d_val; } + + private: + const ELFImage& m_image; + const Elf32_Dyn& m_dyn; + }; + unsigned symbol_count() const; + unsigned dynamic_symbol_count() const; unsigned section_count() const; unsigned program_header_count() const; const Symbol symbol(unsigned) const; + const DynamicSymbol dynamic_symbol(unsigned) const; const Section section(unsigned) const; const ProgramHeader program_header(unsigned const) const; + const DynamicSection dynamic_section() const; + const DynamicRelocationSection dynamic_relocation_section() const; template void for_each_section(F) const; @@ -152,6 +253,8 @@ public: template void for_each_symbol(F) const; template + void for_each_dynamic_symbol(F) const; + template void for_each_program_header(F) const; // NOTE: Returns section(0) if section with name is not found. @@ -160,6 +263,7 @@ public: bool is_executable() const { return header().e_type == ET_EXEC; } bool is_relocatable() const { return header().e_type == ET_REL; } + bool is_dynamic() const { return header().e_type == ET_DYN; } VirtualAddress entry() const { return VirtualAddress(header().e_entry); } @@ -172,12 +276,17 @@ private: const char* table_string(unsigned offset) const; const char* section_header_table_string(unsigned offset) const; const char* section_index_to_string(unsigned index) const; + const char* dynamic_table_string(unsigned offset) const; const u8* m_buffer { nullptr }; HashMap m_sections; bool m_valid { false }; unsigned m_symbol_table_section_index { 0 }; unsigned m_string_table_section_index { 0 }; + unsigned m_dynamic_symbol_table_section_index { 0 }; // .dynsym + unsigned m_dynamic_string_table_section_index { 0 }; // .dynstr + unsigned m_dynamic_section_index { 0 }; // .dynamic + unsigned m_dynamic_relocation_section_index { 0 }; // .rel.dyn }; template @@ -208,6 +317,15 @@ inline void ELFImage::RelocationSection::for_each_relocation(F func) const } } +template +inline void ELFImage::DynamicRelocationSection::for_each_relocation(F func) const +{ + for (unsigned i = 0; i < relocation_count(); ++i) { + if (func(relocation(i)) == IterationDecision::Break) + break; + } +} + template inline void ELFImage::for_each_symbol(F func) const { @@ -217,9 +335,31 @@ inline void ELFImage::for_each_symbol(F func) const } } +template +inline void ELFImage::for_each_dynamic_symbol(F func) const +{ + for (unsigned i = 0; i < dynamic_symbol_count(); ++i) { + if (func(symbol(i)) == IterationDecision::Break) + break; + } +} + template inline void ELFImage::for_each_program_header(F func) const { for (unsigned i = 0; i < program_header_count(); ++i) func(program_header(i)); } + +template +inline void ELFImage::DynamicSection::for_each_dynamic_entry(F func) const +{ + auto* dyns = reinterpret_cast(m_image.raw_data(offset())); + for (unsigned i = 0;; ++i) { + auto&& dyn = DynamicSectionEntry(m_image, dyns[i]); + if (dyn.tag() == DT_NULL) + break; + if (func(dyn) == IterationDecision::Break) + break; + } +} diff --git a/Libraries/LibELF/exec_elf.h b/Libraries/LibELF/exec_elf.h index 2be5076801..9d39ef4005 100644 --- a/Libraries/LibELF/exec_elf.h +++ b/Libraries/LibELF/exec_elf.h @@ -775,7 +775,17 @@ struct elf_args { #define ELF_TARG_VER 1 /* The ver for which this code is intended */ -#define R_386_32 1 -#define R_386_PC32 2 +/* Relocation types */ +#define R_386_NONE 0 +#define R_386_32 1 /* Symbol + Addend */ +#define R_386_PC32 2 /* Symbol + Addend - Section offset */ +#define R_386_GOT32 3 /* Used by build-time linker to create GOT entry */ +#define R_386_PLT32 4 /* Used by build-time linker to create PLT entry */ +#define R_386_COPY 5 /* https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter4-10454.html#chapter4-84604 */ +#define R_386_GLOB_DAT 6 /* Relation b/w GOT entry and symbol */ +#define R_386_JMP_SLOT 7 /* Fixed up by dynamic loader */ +#define R_386_RELATIVE 8 /* Base address + Addned */ +#define R_386_TLS_TPOFF 14 /* Negative offset into the static TLS storage */ + #endif /* _SYS_EXEC_ELF_H_ */