1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-16 16:34:59 +00:00
serenity/Kernel/Syscalls/execve.cpp
Tom c4176b0da1 Kernel: Fix Lock race causing infinite spinning between two threads
We need to account for how many shared lock instances the current
thread owns, so that we can properly release such references when
yielding execution.

We also need to release the process lock when donating.
2020-12-16 23:38:17 +01:00

653 lines
25 KiB
C++

/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/LexicalPath.h>
#include <AK/ScopeGuard.h>
#include <AK/TemporaryChange.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/Process.h>
#include <Kernel/Profiling.h>
#include <Kernel/Random.h>
#include <Kernel/Time/TimeManagement.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>
#include <Kernel/VM/Region.h>
#include <Kernel/VM/SharedInodeVMObject.h>
#include <LibC/limits.h>
#include <LibELF/Loader.h>
#include <LibELF/Validation.h>
//#define EXEC_DEBUG
//#define MM_DEBUG
namespace Kernel {
static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
{
size_t total_blob_size = 0;
for (auto& a : arguments)
total_blob_size += a.length() + 1;
for (auto& e : environment)
total_blob_size += e.length() + 1;
size_t total_meta_size = sizeof(char*) * (arguments.size() + 1) + sizeof(char*) * (environment.size() + 1);
// FIXME: This doesn't account for the size of the auxiliary vector
return (total_blob_size + total_meta_size) < Thread::default_userspace_stack_size;
}
KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls)
{
auto& inode = *(object_description.inode());
auto vmobject = SharedInodeVMObject::create_with_inode(inode);
if (static_cast<const SharedInodeVMObject&>(*vmobject).writable_mappings()) {
dbg() << "Refusing to execute a write-mapped program";
return KResult(-ETXTBSY);
}
InodeMetadata loader_metadata = object_description.metadata();
auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, PAGE_ROUND_UP(loader_metadata.size), "ELF loading", Region::Access::Read);
if (!region) {
dbg() << "Could not allocate memory for ELF loading";
return KResult(-ENOMEM);
}
Region* master_tls_region { nullptr };
size_t master_tls_size = 0;
size_t master_tls_alignment = 0;
m_entry_eip = 0;
FlatPtr load_base_address = 0;
MM.enter_process_paging_scope(*this);
String object_name = LexicalPath(object_description.absolute_path()).basename();
RefPtr<ELF::Loader> loader = ELF::Loader::create(region->vaddr().as_ptr(), loader_metadata.size, move(object_name));
loader->map_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, bool is_executable, const String& name) -> u8* {
ASSERT(size);
ASSERT(alignment == PAGE_SIZE);
int prot = 0;
if (is_readable)
prot |= PROT_READ;
if (is_writable)
prot |= PROT_WRITE;
if (is_executable)
prot |= PROT_EXEC;
if (auto* region = allocate_region_with_vmobject(vaddr.offset(load_offset), size, *vmobject, offset_in_image, String(name), prot)) {
region->set_shared(true);
if (offset_in_image == 0)
load_base_address = (FlatPtr)region->vaddr().as_ptr();
return region->vaddr().as_ptr();
}
return nullptr;
};
loader->alloc_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) -> u8* {
ASSERT(size);
ASSERT(alignment == PAGE_SIZE);
int prot = 0;
if (is_readable)
prot |= PROT_READ;
if (is_writable)
prot |= PROT_WRITE;
if (auto* region = allocate_region(vaddr.offset(load_offset), size, String(name), prot))
return region->vaddr().as_ptr();
return nullptr;
};
if (should_allocate_tls == ShouldAllocateTls::Yes) {
loader->tls_section_hook = [&](size_t size, size_t alignment) {
ASSERT(size);
master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE);
master_tls_size = size;
master_tls_alignment = alignment;
return master_tls_region->vaddr().as_ptr();
};
}
ASSERT(!Processor::current().in_critical());
bool success = loader->load();
if (!success) {
klog() << "do_exec: Failure loading program";
return KResult(-ENOEXEC);
}
if (!loader->entry().offset(load_offset).get()) {
klog() << "do_exec: Failure loading program, entry pointer is invalid! (" << loader->entry().offset(load_offset) << ")";
return KResult(-ENOEXEC);
}
// NOTE: At this point, we've committed to the new executable.
return LoadResult {
load_base_address,
loader->entry().offset(load_offset).get(),
(size_t)loader_metadata.size,
VirtualAddress(loader->image().program_header_table_offset()).offset(load_offset).get(),
loader->image().program_header_count(),
master_tls_region ? master_tls_region->make_weak_ptr() : nullptr,
master_tls_size,
master_tls_alignment
};
}
int Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description)
{
RefPtr<PageDirectory> old_page_directory;
NonnullOwnPtrVector<Region> old_regions;
{
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
old_page_directory = move(m_page_directory);
old_regions = move(m_regions);
m_page_directory = PageDirectory::create_for_userspace(*this);
}
ArmedScopeGuard rollback_regions_guard([&]() {
ASSERT(Process::current() == this);
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
m_page_directory = move(old_page_directory);
m_regions = move(old_regions);
MM.enter_process_paging_scope(*this);
});
if (!interpreter_description) {
auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes);
if (result.is_error())
return result.error();
m_load_base = result.value().load_base;
m_entry_eip = result.value().entry_eip;
m_master_tls_region = result.value().tls_region;
m_master_tls_size = result.value().tls_size;
m_master_tls_alignment = result.value().tls_alignment;
rollback_regions_guard.disarm();
return 0;
}
// TODO: This should be randomized for ASLR
constexpr FlatPtr interpreter_load_offset = 0x08000000;
auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset, ShouldAllocateTls::No);
if (interpreter_load_result.is_error())
return interpreter_load_result.error();
m_load_base = interpreter_load_result.value().load_base;
m_entry_eip = interpreter_load_result.value().entry_eip;
// TLS allocation will be done in userspace by the loader
m_master_tls_region = nullptr;
m_master_tls_size = 0;
m_master_tls_alignment = 0;
rollback_regions_guard.disarm();
return 0;
}
int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment, RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags)
{
ASSERT(is_user_process());
ASSERT(!Processor::current().in_critical());
auto path = main_program_description->absolute_path();
#ifdef EXEC_DEBUG
dbg() << "do_exec(" << path << ")";
#endif
// FIXME: How much stack space does process startup need?
if (!validate_stack_size(arguments, environment))
return -E2BIG;
auto parts = path.split('/');
if (parts.is_empty())
return -ENOENT;
// Disable profiling temporarily in case it's running on this process.
bool was_profiling = is_profiling();
TemporaryChange profiling_disabler(m_profiling, false);
// Mark this thread as the current thread that does exec
// No other thread from this process will be scheduled to run
auto current_thread = Thread::current();
m_exec_tid = current_thread->tid();
#ifdef MM_DEBUG
dbg() << "Process " << pid().value() << " exec: PD=" << m_page_directory.ptr() << " created";
#endif
int load_rc = load(main_program_description, interpreter_description);
if (load_rc) {
klog() << "do_exec: Failed to load main program or interpreter";
return load_rc;
}
kill_threads_except_self();
#ifdef EXEC_DEBUG
klog() << "Memory layout after ELF load:";
dump_regions();
#endif
m_executable = main_program_description->custody();
m_promises = m_execpromises;
m_veil_state = VeilState::None;
m_unveiled_paths.clear();
auto main_program_metadata = main_program_description->metadata();
if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
if (main_program_metadata.is_setuid())
m_euid = m_suid = main_program_metadata.uid;
if (main_program_metadata.is_setgid())
m_egid = m_sgid = main_program_metadata.gid;
}
current_thread->set_default_signal_dispositions();
current_thread->clear_signals();
m_futex_queues.clear();
m_region_lookup_cache = {};
disown_all_shared_buffers();
for (size_t i = 0; i < m_fds.size(); ++i) {
auto& description_and_flags = m_fds[i];
if (description_and_flags.description() && description_and_flags.flags() & FD_CLOEXEC)
description_and_flags = {};
}
if (interpreter_description) {
m_main_program_fd = alloc_fd();
ASSERT(m_main_program_fd >= 0);
main_program_description->seek(0, SEEK_SET);
main_program_description->set_readable(true);
m_fds[m_main_program_fd].set(move(main_program_description), FD_CLOEXEC);
}
new_main_thread = nullptr;
if (&current_thread->process() == this) {
new_main_thread = current_thread;
} else {
for_each_thread([&](auto& thread) {
new_main_thread = &thread;
return IterationDecision::Break;
});
}
ASSERT(new_main_thread);
auto auxv = generate_auxiliary_vector();
// NOTE: We create the new stack before disabling interrupts since it will zero-fault
// and we don't want to deal with faults after this point.
auto make_stack_result = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment), move(auxv));
if (make_stack_result.is_error())
return make_stack_result.error();
u32 new_userspace_esp = make_stack_result.value();
if (wait_for_tracer_at_next_execve())
Thread::current()->send_urgent_signal_to_self(SIGSTOP);
// We enter a critical section here because we don't want to get interrupted between do_exec()
// and Processor::assume_context() or the next context switch.
// If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
Processor::current().enter_critical(prev_flags);
// NOTE: Be careful to not trigger any page faults below!
m_name = parts.take_last();
new_main_thread->set_name(m_name);
// FIXME: PID/TID ISSUE
m_pid = new_main_thread->tid().value();
auto tsr_result = new_main_thread->make_thread_specific_region({});
if (tsr_result.is_error())
return tsr_result.error();
new_main_thread->reset_fpu_state();
auto& tss = new_main_thread->m_tss;
tss.cs = GDT_SELECTOR_CODE3 | 3;
tss.ds = GDT_SELECTOR_DATA3 | 3;
tss.es = GDT_SELECTOR_DATA3 | 3;
tss.ss = GDT_SELECTOR_DATA3 | 3;
tss.fs = GDT_SELECTOR_DATA3 | 3;
tss.gs = GDT_SELECTOR_TLS | 3;
tss.eip = m_entry_eip;
tss.esp = new_userspace_esp;
tss.cr3 = m_page_directory->cr3();
tss.ss2 = m_pid.value();
if (was_profiling)
Profiling::did_exec(path);
{
ScopedSpinLock lock(g_scheduler_lock);
new_main_thread->set_state(Thread::State::Runnable);
}
u32 lock_count_to_restore;
(void)big_lock().force_unlock_if_locked(lock_count_to_restore);
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_critical());
return 0;
}
Vector<AuxiliaryValue> Process::generate_auxiliary_vector() const
{
Vector<AuxiliaryValue> auxv;
// PHDR/EXECFD
// PH*
auxv.append({ AuxiliaryValue::PageSize, PAGE_SIZE });
auxv.append({ AuxiliaryValue::BaseAddress, (void*)m_load_base });
auxv.append({ AuxiliaryValue::Entry, (void*)m_entry_eip });
// NOTELF
auxv.append({ AuxiliaryValue::Uid, (long)m_uid });
auxv.append({ AuxiliaryValue::EUid, (long)m_euid });
auxv.append({ AuxiliaryValue::Gid, (long)m_gid });
auxv.append({ AuxiliaryValue::EGid, (long)m_egid });
// FIXME: Don't hard code this? We might support other platforms later.. (e.g. x86_64)
auxv.append({ AuxiliaryValue::Platform, "i386" });
// FIXME: This is platform specific
auxv.append({ AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
auxv.append({ AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
// FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
auxv.append({ AuxiliaryValue::Secure, ((m_uid != m_euid) || (m_gid != m_egid)) ? 1 : 0 });
char random_bytes[16] {};
get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes));
auxv.append({ AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
auxv.append({ AuxiliaryValue::ExecFilename, m_executable->absolute_path() });
auxv.append({ AuxiliaryValue::ExecFileDescriptor, m_main_program_fd });
auxv.append({ AuxiliaryValue::Null, 0L });
return auxv;
}
static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
{
int word_start = 2;
int word_length = 0;
if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
Vector<String> interpreter_words;
for (int i = 2; i < nread; ++i) {
if (first_page[i] == '\n') {
break;
}
if (first_page[i] != ' ') {
++word_length;
}
if (first_page[i] == ' ') {
if (word_length > 0) {
interpreter_words.append(String(&first_page[word_start], word_length));
}
word_length = 0;
word_start = i + 1;
}
}
if (word_length > 0)
interpreter_words.append(String(&first_page[word_start], word_length));
if (!interpreter_words.is_empty())
return interpreter_words;
}
return KResult(-ENOEXEC);
}
KResultOr<NonnullRefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, char (&first_page)[PAGE_SIZE], int nread, size_t file_size)
{
if (nread < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
auto elf_header = (Elf32_Ehdr*)first_page;
if (!ELF::validate_elf_header(*elf_header, file_size)) {
dbg() << "exec(" << path << "): File has invalid ELF header";
return KResult(-ENOEXEC);
}
// Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
String interpreter_path;
if (!ELF::validate_program_headers(*elf_header, file_size, (u8*)first_page, nread, &interpreter_path)) {
dbg() << "exec(" << path << "): File has invalid ELF Program headers";
return KResult(-ENOEXEC);
}
if (!interpreter_path.is_empty()) {
// Programs with an interpreter better be relocatable executables or we don't know what to do...
if (elf_header->e_type != ET_DYN)
return KResult(-ENOEXEC);
dbg() << "exec(" << path << "): Using program interpreter " << interpreter_path;
auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory());
if (interp_result.is_error()) {
dbg() << "exec(" << path << "): Unable to open program interpreter " << interpreter_path;
return interp_result.error();
}
auto interpreter_description = interp_result.value();
auto interp_metadata = interpreter_description->metadata();
ASSERT(interpreter_description->inode());
// Validate the program interpreter as a valid elf binary.
// If your program interpreter is a #! file or something, it's time to stop playing games :)
if (interp_metadata.size < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
memset(first_page, 0, sizeof(first_page));
auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
if (nread_or_error.is_error())
return KResult(-ENOEXEC);
nread = nread_or_error.value();
if (nread < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
elf_header = (Elf32_Ehdr*)first_page;
if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has invalid ELF header";
return KResult(-ENOEXEC);
}
// Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
String interpreter_interpreter_path;
if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has invalid ELF Program headers";
return KResult(-ENOEXEC);
}
// FIXME: Uncomment this
// How do we get gcc to not insert an interpreter section to /usr/lib/Loader.so itself?
// if (!interpreter_interpreter_path.is_empty()) {
// dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has its own interpreter (" << interpreter_interpreter_path << ")! No thank you!";
// return KResult(-ELOOP);
// }
return interpreter_description;
}
if (elf_header->e_type != ET_EXEC) {
// We can't exec an ET_REL, that's just an object file from the compiler
// If it's ET_DYN with no PT_INTERP, then we can't load it properly either
return KResult(-ENOEXEC);
}
// No interpreter, but, path refers to a valid elf image
return KResult(KSuccess);
}
int Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
{
if (recursion_depth > 2) {
dbg() << "exec(" << path << "): SHENANIGANS! recursed too far trying to find #! interpreter";
return -ELOOP;
}
// Open the file to check what kind of binary format it is
// Currently supported formats:
// - #! interpreted file
// - ELF32
// * ET_EXEC binary that just gets loaded
// * ET_DYN binary that requires a program interpreter
//
auto result = VFS::the().open(path, O_EXEC, 0, current_directory());
if (result.is_error())
return result.error();
auto description = result.release_value();
auto metadata = description->metadata();
// Always gonna need at least 3 bytes. these are for #!X
if (metadata.size < 3)
return -ENOEXEC;
ASSERT(description->inode());
// Read the first page of the program into memory so we can validate the binfmt of it
char first_page[PAGE_SIZE];
auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
if (nread_or_error.is_error())
return -ENOEXEC;
// 1) #! interpreted file
auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
if (!shebang_result.is_error()) {
Vector<String> new_arguments(shebang_result.value());
new_arguments.append(path);
arguments.remove(0);
new_arguments.append(move(arguments));
return exec(shebang_result.value().first(), move(new_arguments), move(environment), ++recursion_depth);
}
// #2) ELF32 for i386
auto elf_result = find_elf_interpreter_for_executable(path, first_page, nread_or_error.value(), metadata.size);
RefPtr<FileDescription> interpreter_description;
// We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
if (!elf_result.is_error())
interpreter_description = elf_result.value();
else if (elf_result.error().is_error())
return elf_result.error();
// The bulk of exec() is done by do_exec(), which ensures that all locals
// are cleaned up by the time we yield-teleport below.
Thread* new_main_thread = nullptr;
u32 prev_flags = 0;
int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags);
m_exec_tid = 0;
if (rc < 0)
return rc;
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_critical());
auto current_thread = Thread::current();
if (current_thread == new_main_thread) {
// We need to enter the scheduler lock before changing the state
// and it will be released after the context switch into that
// thread. We should also still be in our critical section
ASSERT(!g_scheduler_lock.own_lock());
ASSERT(Processor::current().in_critical() == 1);
g_scheduler_lock.lock();
current_thread->set_state(Thread::State::Running);
Processor::assume_context(*current_thread, prev_flags);
ASSERT_NOT_REACHED();
}
Processor::current().leave_critical(prev_flags);
return 0;
}
int Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
{
REQUIRE_PROMISE(exec);
// NOTE: Be extremely careful with allocating any kernel memory in exec().
// On success, the kernel stack will be lost.
Syscall::SC_execve_params params;
if (!copy_from_user(&params, user_params))
return -EFAULT;
if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
return -E2BIG;
String path;
{
auto path_arg = get_syscall_path_argument(params.path);
if (path_arg.is_error())
return path_arg.error();
path = path_arg.value();
}
auto copy_user_strings = [](const auto& list, auto& output) {
if (!list.length)
return true;
Checked size = sizeof(list.length);
size *= list.length;
if (size.has_overflow())
return false;
Vector<Syscall::StringArgument, 32> strings;
strings.resize(list.length);
if (!copy_from_user(strings.data(), list.strings, list.length * sizeof(Syscall::StringArgument)))
return false;
for (size_t i = 0; i < list.length; ++i) {
auto string = copy_string_from_user(strings[i]);
if (string.is_null())
return false;
output.append(move(string));
}
return true;
};
Vector<String> arguments;
if (!copy_user_strings(params.arguments, arguments))
return -EFAULT;
Vector<String> environment;
if (!copy_user_strings(params.environment, environment))
return -EFAULT;
int rc = exec(move(path), move(arguments), move(environment));
ASSERT(rc < 0); // We should never continue after a successful exec!
return rc;
}
}