1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-20 11:45:06 +00:00
serenity/Kernel/Syscalls/execve.cpp
Tom c8d9f1b9c9 Kernel: Make copy_to/from_user safe and remove unnecessary checks
Since the CPU already does almost all necessary validation steps
for us, we don't really need to attempt to do this. Doing it
ourselves doesn't really work very reliably, because we'd have to
account for other processors modifying virtual memory, and we'd
have to account for e.g. pages not being able to be allocated
due to insufficient resources.

So change the copy_to/from_user (and associated helper functions)
to use the new safe_memcpy, which will return whether it succeeded
or not. The only manual validation step needed (which the CPU
can't perform for us) is making sure the pointers provided by user
mode aren't pointing to kernel mappings.

To make it easier to read/write from/to either kernel or user mode
data add the UserOrKernelBuffer helper class, which will internally
either use copy_from/to_user or directly memcpy, or pass the data
through directly using a temporary buffer on the stack.

Last but not least we need to keep syscall params trivial as we
need to copy them from/to user mode using copy_from/to_user.
2020-09-13 21:19:15 +02:00

608 lines
24 KiB
C++

/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/ScopeGuard.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/Process.h>
#include <Kernel/Profiling.h>
#include <Kernel/Random.h>
#include <Kernel/Time/TimeManagement.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>
#include <Kernel/VM/Region.h>
#include <Kernel/VM/SharedInodeVMObject.h>
#include <LibC/limits.h>
#include <LibELF/Loader.h>
#include <LibELF/Validation.h>
//#define EXEC_DEBUG
//#define MM_DEBUG
namespace Kernel {
int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment, RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags)
{
ASSERT(is_user_process());
ASSERT(!Processor::current().in_critical());
auto path = main_program_description->absolute_path();
#ifdef EXEC_DEBUG
dbg() << "do_exec(" << path << ")";
#endif
size_t total_blob_size = 0;
for (auto& a : arguments)
total_blob_size += a.length() + 1;
for (auto& e : environment)
total_blob_size += e.length() + 1;
size_t total_meta_size = sizeof(char*) * (arguments.size() + 1) + sizeof(char*) * (environment.size() + 1);
// FIXME: How much stack space does process startup need?
if ((total_blob_size + total_meta_size) >= Thread::default_userspace_stack_size)
return -E2BIG;
auto parts = path.split('/');
if (parts.is_empty())
return -ENOENT;
auto& inode = interpreter_description ? *interpreter_description->inode() : *main_program_description->inode();
auto vmobject = SharedInodeVMObject::create_with_inode(inode);
if (static_cast<const SharedInodeVMObject&>(*vmobject).writable_mappings()) {
dbg() << "Refusing to execute a write-mapped program";
return -ETXTBSY;
}
// Disable profiling temporarily in case it's running on this process.
bool was_profiling = is_profiling();
TemporaryChange profiling_disabler(m_profiling, false);
// Mark this thread as the current thread that does exec
// No other thread from this process will be scheduled to run
auto current_thread = Thread::current();
m_exec_tid = current_thread->tid();
RefPtr<PageDirectory> old_page_directory;
NonnullOwnPtrVector<Region> old_regions;
{
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
old_page_directory = move(m_page_directory);
old_regions = move(m_regions);
m_page_directory = PageDirectory::create_for_userspace(*this);
}
#ifdef MM_DEBUG
dbg() << "Process " << pid().value() << " exec: PD=" << m_page_directory.ptr() << " created";
#endif
InodeMetadata loader_metadata;
// FIXME: Hoooo boy this is a hack if I ever saw one.
// This is the 'random' offset we're giving to our ET_DYN exectuables to start as.
// It also happens to be the static Virtual Addresss offset every static exectuable gets :)
// Without this, some assumptions by the ELF loading hooks below are severely broken.
// 0x08000000 is a verified random number chosen by random dice roll https://xkcd.com/221/
m_load_offset = interpreter_description ? 0x08000000 : 0;
// FIXME: We should be able to load both the PT_INTERP interpreter and the main program... once the RTLD is smart enough
if (interpreter_description) {
loader_metadata = interpreter_description->metadata();
// we don't need the interpreter file desciption after we've loaded (or not) it into memory
interpreter_description = nullptr;
} else {
loader_metadata = main_program_description->metadata();
}
auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, PAGE_ROUND_UP(loader_metadata.size), "ELF loading", Region::Access::Read);
if (!region)
return -ENOMEM;
Region* master_tls_region { nullptr };
size_t master_tls_size = 0;
size_t master_tls_alignment = 0;
m_entry_eip = 0;
MM.enter_process_paging_scope(*this);
RefPtr<ELF::Loader> loader;
{
ArmedScopeGuard rollback_regions_guard([&]() {
ASSERT(Process::current() == this);
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
m_page_directory = move(old_page_directory);
m_regions = move(old_regions);
MM.enter_process_paging_scope(*this);
});
loader = ELF::Loader::create(region->vaddr().as_ptr(), loader_metadata.size);
// Load the correct executable -- either interp or main program.
// FIXME: Once we actually load both interp and main, we'll need to be more clever about this.
// In that case, both will be ET_DYN objects, so they'll both be completely relocatable.
// That means, we can put them literally anywhere in User VM space (ASLR anyone?).
// ALSO FIXME: Reminder to really really fix that 'totally random offset' business.
loader->map_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, bool is_executable, const String& name) -> u8* {
ASSERT(size);
ASSERT(alignment == PAGE_SIZE);
int prot = 0;
if (is_readable)
prot |= PROT_READ;
if (is_writable)
prot |= PROT_WRITE;
if (is_executable)
prot |= PROT_EXEC;
if (auto* region = allocate_region_with_vmobject(vaddr.offset(m_load_offset), size, *vmobject, offset_in_image, String(name), prot)) {
region->set_shared(true);
return region->vaddr().as_ptr();
}
return nullptr;
};
loader->alloc_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) -> u8* {
ASSERT(size);
ASSERT(alignment == PAGE_SIZE);
int prot = 0;
if (is_readable)
prot |= PROT_READ;
if (is_writable)
prot |= PROT_WRITE;
if (auto* region = allocate_region(vaddr.offset(m_load_offset), size, String(name), prot))
return region->vaddr().as_ptr();
return nullptr;
};
// FIXME: Move TLS region allocation to userspace: LibC and the dynamic loader.
// LibC if we end up with a statically linked executable, and the
// dynamic loader so that it can create new TLS blocks for each shared libarary
// that gets loaded as part of DT_NEEDED processing, and via dlopen()
// If that doesn't happen quickly, at least pass the location of the TLS region
// some ELF Auxilliary Vector so the loader can use it/create new ones as necessary.
loader->tls_section_hook = [&](size_t size, size_t alignment) {
ASSERT(size);
master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE);
master_tls_size = size;
master_tls_alignment = alignment;
return master_tls_region->vaddr().as_ptr();
};
ASSERT(!Processor::current().in_critical());
bool success = loader->load();
if (!success) {
klog() << "do_exec: Failure loading " << path.characters();
return -ENOEXEC;
}
// FIXME: Validate that this virtual address is within executable region,
// instead of just non-null. You could totally have a DSO with entry point of
// the beginning of the text segement.
if (!loader->entry().offset(m_load_offset).get()) {
klog() << "do_exec: Failure loading " << path.characters() << ", entry pointer is invalid! (" << loader->entry().offset(m_load_offset) << ")";
return -ENOEXEC;
}
rollback_regions_guard.disarm();
// NOTE: At this point, we've committed to the new executable.
m_entry_eip = loader->entry().offset(m_load_offset).get();
kill_threads_except_self();
#ifdef EXEC_DEBUG
klog() << "Memory layout after ELF load:";
dump_regions();
#endif
}
m_executable = main_program_description->custody();
m_promises = m_execpromises;
m_veil_state = VeilState::None;
m_unveiled_paths.clear();
// Copy of the master TLS region that we will clone for new threads
// FIXME: Handle this in userspace
m_master_tls_region = master_tls_region->make_weak_ptr();
auto main_program_metadata = main_program_description->metadata();
if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
if (main_program_metadata.is_setuid())
m_euid = m_suid = main_program_metadata.uid;
if (main_program_metadata.is_setgid())
m_egid = m_sgid = main_program_metadata.gid;
}
current_thread->set_default_signal_dispositions();
current_thread->clear_signals();
m_futex_queues.clear();
m_region_lookup_cache = {};
disown_all_shared_buffers();
for (size_t i = 0; i < m_fds.size(); ++i) {
auto& description_and_flags = m_fds[i];
if (description_and_flags.description() && description_and_flags.flags() & FD_CLOEXEC) {
// FIXME: Should this error path be observed somehow?
(void)description_and_flags.description()->close();
description_and_flags = {};
}
}
new_main_thread = nullptr;
if (&current_thread->process() == this) {
new_main_thread = current_thread;
} else {
for_each_thread([&](auto& thread) {
new_main_thread = &thread;
return IterationDecision::Break;
});
}
ASSERT(new_main_thread);
auto auxv = generate_auxiliary_vector();
// NOTE: We create the new stack before disabling interrupts since it will zero-fault
// and we don't want to deal with faults after this point.
u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment), move(auxv));
if (wait_for_tracer_at_next_execve())
Thread::current()->send_urgent_signal_to_self(SIGSTOP);
// We enter a critical section here because we don't want to get interrupted between do_exec()
// and Processor::assume_context() or the next context switch.
// If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
Processor::current().enter_critical(prev_flags);
// NOTE: Be careful to not trigger any page faults below!
m_name = parts.take_last();
new_main_thread->set_name(m_name);
m_master_tls_size = master_tls_size;
m_master_tls_alignment = master_tls_alignment;
// FIXME: PID/TID ISSUE
m_pid = new_main_thread->tid().value();
new_main_thread->make_thread_specific_region({});
new_main_thread->reset_fpu_state();
auto& tss = new_main_thread->m_tss;
tss.cs = GDT_SELECTOR_CODE3 | 3;
tss.ds = GDT_SELECTOR_DATA3 | 3;
tss.es = GDT_SELECTOR_DATA3 | 3;
tss.ss = GDT_SELECTOR_DATA3 | 3;
tss.fs = GDT_SELECTOR_DATA3 | 3;
tss.gs = GDT_SELECTOR_TLS | 3;
tss.eip = m_entry_eip;
tss.esp = new_userspace_esp;
tss.cr3 = m_page_directory->cr3();
tss.ss2 = m_pid.value();
if (was_profiling)
Profiling::did_exec(path);
new_main_thread->set_state(Thread::State::Runnable);
big_lock().force_unlock_if_locked();
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_critical());
return 0;
}
Vector<AuxiliaryValue> Process::generate_auxiliary_vector() const
{
Vector<AuxiliaryValue> auxv;
// PHDR/EXECFD
// PH*
auxv.append({ AuxiliaryValue::PageSize, PAGE_SIZE });
auxv.append({ AuxiliaryValue::BaseAddress, (void*)m_load_offset });
// FLAGS
auxv.append({ AuxiliaryValue::Entry, (void*)m_entry_eip });
// NOTELF
auxv.append({ AuxiliaryValue::Uid, (long)m_uid });
auxv.append({ AuxiliaryValue::EUid, (long)m_euid });
auxv.append({ AuxiliaryValue::Gid, (long)m_gid });
auxv.append({ AuxiliaryValue::EGid, (long)m_egid });
// FIXME: Don't hard code this? We might support other platforms later.. (e.g. x86_64)
auxv.append({ AuxiliaryValue::Platform, "i386" });
// FIXME: This is platform specific
auxv.append({ AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
auxv.append({ AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
// FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
auxv.append({ AuxiliaryValue::Secure, ((m_uid != m_euid) || (m_gid != m_egid)) ? 1 : 0 });
char random_bytes[16] {};
get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes));
auxv.append({ AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
auxv.append({ AuxiliaryValue::ExecFilename, m_executable->absolute_path() });
auxv.append({ AuxiliaryValue::Null, 0L });
return auxv;
}
static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
{
int word_start = 2;
int word_length = 0;
if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
Vector<String> interpreter_words;
for (int i = 2; i < nread; ++i) {
if (first_page[i] == '\n') {
break;
}
if (first_page[i] != ' ') {
++word_length;
}
if (first_page[i] == ' ') {
if (word_length > 0) {
interpreter_words.append(String(&first_page[word_start], word_length));
}
word_length = 0;
word_start = i + 1;
}
}
if (word_length > 0)
interpreter_words.append(String(&first_page[word_start], word_length));
if (!interpreter_words.is_empty())
return interpreter_words;
}
return KResult(-ENOEXEC);
}
KResultOr<NonnullRefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, char (&first_page)[PAGE_SIZE], int nread, size_t file_size)
{
if (nread < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
auto elf_header = (Elf32_Ehdr*)first_page;
if (!ELF::validate_elf_header(*elf_header, file_size)) {
dbg() << "exec(" << path << "): File has invalid ELF header";
return KResult(-ENOEXEC);
}
// Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
String interpreter_path;
if (!ELF::validate_program_headers(*elf_header, file_size, (u8*)first_page, nread, interpreter_path)) {
dbg() << "exec(" << path << "): File has invalid ELF Program headers";
return KResult(-ENOEXEC);
}
if (!interpreter_path.is_empty()) {
// Programs with an interpreter better be relocatable executables or we don't know what to do...
if (elf_header->e_type != ET_DYN)
return KResult(-ENOEXEC);
dbg() << "exec(" << path << "): Using program interpreter " << interpreter_path;
auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory());
if (interp_result.is_error()) {
dbg() << "exec(" << path << "): Unable to open program interpreter " << interpreter_path;
return interp_result.error();
}
auto interpreter_description = interp_result.value();
auto interp_metadata = interpreter_description->metadata();
ASSERT(interpreter_description->inode());
// Validate the program interpreter as a valid elf binary.
// If your program interpreter is a #! file or something, it's time to stop playing games :)
if (interp_metadata.size < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
memset(first_page, 0, sizeof(first_page));
auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
if (nread_or_error.is_error())
return KResult(-ENOEXEC);
nread = nread_or_error.value();
if (nread < (int)sizeof(Elf32_Ehdr))
return KResult(-ENOEXEC);
elf_header = (Elf32_Ehdr*)first_page;
if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has invalid ELF header";
return KResult(-ENOEXEC);
}
// Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
String interpreter_interpreter_path;
if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, interpreter_interpreter_path)) {
dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has invalid ELF Program headers";
return KResult(-ENOEXEC);
}
if (!interpreter_interpreter_path.is_empty()) {
dbg() << "exec(" << path << "): Interpreter (" << interpreter_description->absolute_path() << ") has its own interpreter (" << interpreter_interpreter_path << ")! No thank you!";
return KResult(-ELOOP);
}
return interpreter_description;
}
if (elf_header->e_type != ET_EXEC) {
// We can't exec an ET_REL, that's just an object file from the compiler
// If it's ET_DYN with no PT_INTERP, then we can't load it properly either
return KResult(-ENOEXEC);
}
// No interpreter, but, path refers to a valid elf image
return KResult(KSuccess);
}
int Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
{
if (recursion_depth > 2) {
dbg() << "exec(" << path << "): SHENANIGANS! recursed too far trying to find #! interpreter";
return -ELOOP;
}
// Open the file to check what kind of binary format it is
// Currently supported formats:
// - #! interpreted file
// - ELF32
// * ET_EXEC binary that just gets loaded
// * ET_DYN binary that requires a program interpreter
//
auto result = VFS::the().open(path, O_EXEC, 0, current_directory());
if (result.is_error())
return result.error();
auto description = result.value();
auto metadata = description->metadata();
// Always gonna need at least 3 bytes. these are for #!X
if (metadata.size < 3)
return -ENOEXEC;
ASSERT(description->inode());
// Read the first page of the program into memory so we can validate the binfmt of it
char first_page[PAGE_SIZE];
auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
if (nread_or_error.is_error())
return -ENOEXEC;
// 1) #! interpreted file
auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
if (!shebang_result.is_error()) {
Vector<String> new_arguments(shebang_result.value());
new_arguments.append(path);
arguments.remove(0);
new_arguments.append(move(arguments));
return exec(shebang_result.value().first(), move(new_arguments), move(environment), ++recursion_depth);
}
// #2) ELF32 for i386
auto elf_result = find_elf_interpreter_for_executable(path, first_page, nread_or_error.value(), metadata.size);
RefPtr<FileDescription> interpreter_description;
// We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
if (!elf_result.is_error())
interpreter_description = elf_result.value();
else if (elf_result.error().is_error())
return elf_result.error();
// The bulk of exec() is done by do_exec(), which ensures that all locals
// are cleaned up by the time we yield-teleport below.
Thread* new_main_thread = nullptr;
u32 prev_flags = 0;
int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags);
m_exec_tid = 0;
if (rc < 0)
return rc;
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_critical());
auto current_thread = Thread::current();
if (current_thread == new_main_thread) {
// We need to enter the scheduler lock before changing the state
// and it will be released after the context switch into that
// thread. We should also still be in our critical section
ASSERT(!g_scheduler_lock.own_lock());
ASSERT(Processor::current().in_critical() == 1);
g_scheduler_lock.lock();
current_thread->set_state(Thread::State::Running);
Processor::assume_context(*current_thread, prev_flags);
ASSERT_NOT_REACHED();
}
Processor::current().leave_critical(prev_flags);
return 0;
}
int Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
{
REQUIRE_PROMISE(exec);
// NOTE: Be extremely careful with allocating any kernel memory in exec().
// On success, the kernel stack will be lost.
Syscall::SC_execve_params params;
if (!copy_from_user(&params, user_params))
return -EFAULT;
if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
return -E2BIG;
if (wait_for_tracer_at_next_execve())
Thread::current()->send_urgent_signal_to_self(SIGSTOP);
String path;
{
auto path_arg = get_syscall_path_argument(params.path);
if (path_arg.is_error())
return path_arg.error();
path = path_arg.value();
}
auto copy_user_strings = [this](const auto& list, auto& output) {
if (!list.length)
return true;
Checked size = sizeof(list.length);
size *= list.length;
if (size.has_overflow())
return false;
Vector<Syscall::StringArgument, 32> strings;
strings.resize(list.length);
if (!copy_from_user(strings.data(), list.strings, list.length * sizeof(Syscall::StringArgument)))
return false;
for (size_t i = 0; i < list.length; ++i) {
auto string = copy_string_from_user(strings[i]);
if (string.is_null())
return false;
output.append(move(string));
}
return true;
};
Vector<String> arguments;
if (!copy_user_strings(params.arguments, arguments))
return -EFAULT;
Vector<String> environment;
if (!copy_user_strings(params.environment, environment))
return -EFAULT;
int rc = exec(move(path), move(arguments), move(environment));
ASSERT(rc < 0); // We should never continue after a successful exec!
return rc;
}
}