From 8accc92c3c060e5f3d236bc6ee7de90e64900ace Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 2 Nov 2018 20:41:58 +0100 Subject: [PATCH] Implement fork()! This is quite cool! The syscall entry point plumbs the register dump down to sys$fork(), which uses it to set up the child process's TSS in order to resume execution right after the int 0x80 fork() call. :^) This works pretty well, although there is some problem with the kernel alias mappings used to clone the parent process's regions. If I disable the MM::release_page_directory() code, there's no problem. Probably there's a premature freeing of a physical page somehow. --- Kernel/MemoryManager.cpp | 20 +++ Kernel/MemoryManager.h | 2 + Kernel/Process.cpp | 205 ++++++++++++++++++++----------- Kernel/Process.h | 7 +- Kernel/Syscall.cpp | 7 +- Kernel/Syscall.h | 1 + Kernel/kmalloc.cpp | 10 ++ Kernel/sync.sh | 1 + LibC/unistd.cpp | 5 + LibC/unistd.h | 1 + Userland/.gitignore | 1 + Userland/Makefile | 5 + Userland/ft.cpp | 14 +++ Userland/sh.cpp | 12 ++ VirtualFileSystem/FileHandle.cpp | 13 ++ VirtualFileSystem/FileHandle.h | 2 + 16 files changed, 228 insertions(+), 78 deletions(-) create mode 100644 Userland/ft.cpp diff --git a/Kernel/MemoryManager.cpp b/Kernel/MemoryManager.cpp index bdea62f373..4434ef54cf 100644 --- a/Kernel/MemoryManager.cpp +++ b/Kernel/MemoryManager.cpp @@ -407,3 +407,23 @@ bool MemoryManager::validate_user_write(const Process& process, LinearAddress la return false; return true; } + +RetainPtr Region::clone() +{ + InterruptDisabler disabler; + KernelPagingScope pagingScope; + + // FIXME: Implement COW regions. + auto clone_zone = MM.createZone(zone->size()); + auto clone_region = adopt(*new Region(linearAddress, size, move(clone_zone), String(name))); + + // FIXME: It would be cool to make the src_alias a read-only mapping. + byte* src_alias = MM.create_kernel_alias_for_region(*this); + byte* dest_alias = MM.create_kernel_alias_for_region(*clone_region); + + memcpy(dest_alias, src_alias, size); + + MM.remove_kernel_alias_for_region(*clone_region, dest_alias); + MM.remove_kernel_alias_for_region(*this, src_alias); + return clone_region; +} diff --git a/Kernel/MemoryManager.h b/Kernel/MemoryManager.h index f86e1e5ea0..5b893f4184 100644 --- a/Kernel/MemoryManager.h +++ b/Kernel/MemoryManager.h @@ -40,6 +40,8 @@ private: struct Region : public Retainable { Region(LinearAddress, size_t, RetainPtr&&, String&&); ~Region(); + + RetainPtr clone(); LinearAddress linearAddress; size_t size { 0 }; RetainPtr zone; diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 96df47c99f..1c634f3820 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -105,30 +105,6 @@ void Process::initialize() loadTaskRegister(s_kernelProcess->selector()); } -void Process::allocateLDT() -{ - ASSERT(!m_tss.ldt); - static const WORD numLDTEntries = 4; - m_ldt_selector = gdt_alloc_entry(); - m_ldtEntries = new Descriptor[numLDTEntries]; -#if 0 - kprintf("new ldt selector = %x\n", m_ldt_selector); - kprintf("new ldt table at = %p\n", m_ldtEntries); - kprintf("new ldt table size = %u\n", (numLDTEntries * 8) - 1); -#endif - Descriptor& ldt = getGDTEntry(m_ldt_selector); - ldt.setBase(m_ldtEntries); - ldt.setLimit(numLDTEntries * 8 - 1); - ldt.dpl = 0; - ldt.segment_present = 1; - ldt.granularity = 0; - ldt.zero = 0; - ldt.operation_size = 1; - ldt.descriptor_type = 0; - ldt.type = Descriptor::LDT; - m_tss.ldt = m_ldt_selector; -} - template static void forEachProcess(Callback callback) { @@ -235,6 +211,77 @@ int Process::sys$gethostname(char* buffer, size_t size) return 0; } +Process* Process::fork(RegisterDump& regs) +{ + auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this); +#ifdef FORK_DEBUG + dbgprintf("fork: child=%p\n", child); +#endif + +#if 0 + // FIXME: An honest fork() would copy these. Needs a Vector copy ctor. + child->m_arguments = m_arguments; + child->m_initialEnvironment = m_initialEnvironment; +#endif + + for (auto& region : m_regions) { +#ifdef FORK_DEBUG + dbgprintf("fork: cloning Region{%p}\n", region.ptr()); +#endif + auto cloned_region = region->clone(); + // FIXME: Move subregions into Region? + for (auto& subregion : m_subregions) { + if (subregion->region.ptr() != region.ptr()) + continue; +#ifdef FORK_DEBUG + dbgprintf("fork: cloning Subregion{%p}\n", subregion.ptr()); +#endif + auto cloned_subregion = make(*cloned_region, subregion->offset, subregion->size, subregion->linearAddress, String(subregion->name)); + child->m_subregions.append(move(cloned_subregion)); + MM.mapSubregion(*child, *child->m_subregions.last()); + } + child->m_regions.append(move(cloned_region)); + MM.mapRegion(*child, *child->m_regions.last()); + } + + child->m_tss.eax = 0; // fork() returns 0 in the child :^) + child->m_tss.ebx = regs.ebx; + child->m_tss.ecx = regs.ecx; + child->m_tss.edx = regs.edx; + child->m_tss.ebp = regs.ebp; + child->m_tss.esp = regs.esp_if_crossRing; + child->m_tss.esi = regs.esi; + child->m_tss.edi = regs.edi; + child->m_tss.eflags = regs.eflags; + child->m_tss.eip = regs.eip; + child->m_tss.cs = regs.cs; + child->m_tss.ds = regs.ds; + child->m_tss.es = regs.es; + child->m_tss.fs = regs.fs; + child->m_tss.gs = regs.gs; + child->m_tss.ss = regs.ss_if_crossRing; + +#ifdef FORK_DEBUG + dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp); +#endif + + ProcFileSystem::the().addProcess(*child); + + s_processes->prepend(child); + system.nprocess++; +#ifdef TASK_DEBUG + kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip); +#endif + return child; +} + +pid_t Process::sys$fork(RegisterDump& regs) +{ + auto* child = fork(regs); + ASSERT(child); + return child->pid(); +} + int Process::sys$spawn(const char* path, const char** args) { if (args) { @@ -413,9 +460,9 @@ Process* Process::createKernelProcess(void (*e)(), String&& name) return process; } -Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr&& cwd, RetainPtr&& executable, TTY* tty) +Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr&& cwd, RetainPtr&& executable, TTY* tty, Process* fork_parent) : m_name(move(name)) - , m_pid(next_pid++) + , m_pid(next_pid++) // FIXME: RACE: This variable looks racy! , m_uid(uid) , m_gid(gid) , m_state(Runnable) @@ -425,57 +472,71 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel , m_tty(tty) , m_parentPID(parentPID) { - { + if (fork_parent) { + m_sid = fork_parent->m_sid; + m_pgid = fork_parent->m_pgid; + } else { // FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though.. InterruptDisabler disabler; if (auto* parent = Process::fromPID(m_parentPID)) { m_sid = parent->m_sid; m_pgid = parent->m_pgid; } - } m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory)); MM.populate_page_directory(*this); - m_file_descriptors.resize(m_max_open_file_descriptors); - - if (tty) { - m_file_descriptors[0] = tty->open(O_RDONLY); - m_file_descriptors[1] = tty->open(O_WRONLY); - m_file_descriptors[2] = tty->open(O_WRONLY); - } - - m_nextRegion = LinearAddress(0x10000000); - - memset(&m_tss, 0, sizeof(m_tss)); - - if (isRing3()) { - memset(&m_ldtEntries, 0, sizeof(m_ldtEntries)); - allocateLDT(); - } - - // Only IF is set when a process boots. - m_tss.eflags = 0x0202; - - word cs, ds, ss; - - if (isRing0()) { - cs = 0x08; - ds = 0x10; - ss = 0x10; + if (fork_parent) { + m_file_descriptors.resize(fork_parent->m_file_descriptors.size()); + for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) { + if (!fork_parent->m_file_descriptors[i]) + continue; +#ifdef FORK_DEBUG + dbgprintf("fork: cloning fd %u... (%p) istty? %um\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY()); +#endif + m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone(); + } } else { - cs = 0x1b; - ds = 0x23; - ss = 0x23; + m_file_descriptors.resize(m_max_open_file_descriptors); + if (tty) { + m_file_descriptors[0] = tty->open(O_RDONLY); + m_file_descriptors[1] = tty->open(O_WRONLY); + m_file_descriptors[2] = tty->open(O_WRONLY); + } } - m_tss.ds = ds; - m_tss.es = ds; - m_tss.fs = ds; - m_tss.gs = ds; - m_tss.ss = ss; - m_tss.cs = cs; + if (fork_parent) + m_nextRegion = fork_parent->m_nextRegion; + else + m_nextRegion = LinearAddress(0x10000000); + + if (fork_parent) { + memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss)); + } else { + memset(&m_tss, 0, sizeof(m_tss)); + + // Only IF is set when a process boots. + m_tss.eflags = 0x0202; + word cs, ds, ss; + + if (isRing0()) { + cs = 0x08; + ds = 0x10; + ss = 0x10; + } else { + cs = 0x1b; + ds = 0x23; + ss = 0x23; + } + + m_tss.ds = ds; + m_tss.es = ds; + m_tss.fs = ds; + m_tss.gs = ds; + m_tss.ss = ss; + m_tss.cs = cs; + } m_tss.cr3 = (dword)m_page_directory; @@ -486,10 +547,14 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8; m_tss.esp = m_stackTop0; } else { - auto* region = allocateRegion(defaultStackSize, "stack"); - ASSERT(region); - m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8; - m_tss.esp = m_stackTop3; + if (fork_parent) { + m_stackTop3 = fork_parent->m_stackTop3; + } else { + auto* region = allocateRegion(defaultStackSize, "stack"); + ASSERT(region); + m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8; + m_tss.esp = m_stackTop3; + } } if (isRing3()) { @@ -511,12 +576,6 @@ Process::~Process() ProcFileSystem::the().removeProcess(*this); system.nprocess--; - if (isRing3()) { - delete [] m_ldtEntries; - m_ldtEntries = nullptr; - gdt_free_entry(m_ldt_selector); - } - gdt_free_entry(selector()); if (m_kernelStack) { diff --git a/Kernel/Process.h b/Kernel/Process.h index 7ac558e7d2..2ee01dd07a 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -119,6 +119,7 @@ public: int sys$uname(utsname*); int sys$readlink(const char*, char*, size_t); int sys$ttyname_r(int fd, char*, size_t); + pid_t sys$fork(RegisterDump&); static void initialize(); @@ -155,11 +156,13 @@ public: void send_signal(int signal, Process* sender); + Process* fork(RegisterDump&); + private: friend class MemoryManager; friend bool scheduleNewProcess(); - Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr&& cwd = nullptr, RetainPtr&& executable = nullptr, TTY* = nullptr); + Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr&& cwd = nullptr, RetainPtr&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr); void allocateLDT(); @@ -183,8 +186,6 @@ private: State m_state { Invalid }; DWORD m_wakeupTime { 0 }; TSS32 m_tss; - word m_ldt_selector { 0 }; - Descriptor* m_ldtEntries { nullptr }; Vector> m_file_descriptors; RingLevel m_ring { Ring0 }; int m_error { 0 }; diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index 0ffbe7659e..353dcbf1a9 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -43,7 +43,7 @@ void initialize() kprintf("syscall: int 0x80 handler installed\n"); } -DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3) +static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2, DWORD arg3) { ASSERT_INTERRUPTS_ENABLED(); switch (function) { @@ -128,6 +128,8 @@ DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3) return current->sys$tcgetpgrp((int)arg1); case Syscall::PosixTcsetpgrp: return current->sys$tcsetpgrp((int)arg1, (pid_t)arg2); + case Syscall::PosixFork: + return current->sys$fork(regs); default: kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3); break; @@ -143,5 +145,6 @@ void syscall_entry(RegisterDump& regs) DWORD arg1 = regs.edx; DWORD arg2 = regs.ecx; DWORD arg3 = regs.ebx; - regs.eax = Syscall::handle(function, arg1, arg2, arg3); + regs.eax = Syscall::handle(regs, function, arg1, arg2, arg3); } + diff --git a/Kernel/Syscall.h b/Kernel/Syscall.h index 97116c16f9..8b4208f81e 100644 --- a/Kernel/Syscall.h +++ b/Kernel/Syscall.h @@ -47,6 +47,7 @@ enum Function { PosixGetpgrp = 0x2015, PosixTcsetpgrp = 0x2016, PosixTcgetpgrp = 0x2017, + PosixFork = 0x2018, }; void initialize(); diff --git a/Kernel/kmalloc.cpp b/Kernel/kmalloc.cpp index e9c6cf286c..4cf5b58d67 100644 --- a/Kernel/kmalloc.cpp +++ b/Kernel/kmalloc.cpp @@ -26,6 +26,8 @@ typedef struct #define ETERNAL_BASE_PHYSICAL 0x200000 #define BASE_PHYS 0x100000 +#define RANGE_SIZE 0x100000 + PRIVATE BYTE alloc_map[POOL_SIZE / CHUNK_SIZE / 8]; volatile DWORD sum_alloc = 0; @@ -36,6 +38,9 @@ volatile size_t kmalloc_sum_page_aligned = 0; static byte* s_next_eternal_ptr; static byte* s_next_page_aligned_ptr; +static byte* s_end_of_eternal_range; +static byte* s_end_of_page_aligned_range; + bool is_kmalloc_address(void* ptr) { if (ptr >= (byte*)ETERNAL_BASE_PHYSICAL && ptr < s_next_eternal_ptr) @@ -58,12 +63,16 @@ kmalloc_init() s_next_eternal_ptr = (byte*)ETERNAL_BASE_PHYSICAL; s_next_page_aligned_ptr = (byte*)PAGE_ALIGNED_BASE_PHYSICAL; + + s_end_of_eternal_range = s_next_eternal_ptr + RANGE_SIZE; + s_end_of_page_aligned_range = s_next_page_aligned_ptr + RANGE_SIZE; } void* kmalloc_eternal(size_t size) { void* ptr = s_next_eternal_ptr; s_next_eternal_ptr += size; + ASSERT(s_next_eternal_ptr < s_end_of_eternal_range); kmalloc_sum_eternal += size; return ptr; } @@ -73,6 +82,7 @@ void* kmalloc_page_aligned(size_t size) ASSERT((size % 4096) == 0); void* ptr = s_next_page_aligned_ptr; s_next_page_aligned_ptr += size; + ASSERT(s_next_page_aligned_ptr < s_end_of_page_aligned_range); kmalloc_sum_page_aligned += size; return ptr; } diff --git a/Kernel/sync.sh b/Kernel/sync.sh index ae4d5b03ec..3248ac1d6e 100755 --- a/Kernel/sync.sh +++ b/Kernel/sync.sh @@ -18,6 +18,7 @@ cp ../Userland/cat mnt/bin/cat cp ../Userland/uname mnt/bin/uname cp ../Userland/clear mnt/bin/clear cp ../Userland/tst mnt/bin/tst +cp ../Userland/ft mnt/bin/ft cp ../Userland/mm mnt/bin/mm cp ../Userland/kill mnt/bin/kill cp ../Userland/tty mnt/bin/tty diff --git a/LibC/unistd.cpp b/LibC/unistd.cpp index 3492430198..9a3b2459b8 100644 --- a/LibC/unistd.cpp +++ b/LibC/unistd.cpp @@ -5,6 +5,11 @@ extern "C" { +pid_t fork() +{ + return Syscall::invoke(Syscall::PosixFork); +} + uid_t getuid() { return Syscall::invoke(Syscall::PosixGetuid); diff --git a/LibC/unistd.h b/LibC/unistd.h index cf2df7e97d..205002dff8 100644 --- a/LibC/unistd.h +++ b/LibC/unistd.h @@ -8,6 +8,7 @@ __BEGIN_DECLS extern char** environ; inline int getpagesize() { return 4096; } +pid_t fork(); pid_t getsid(pid_t); pid_t setsid(); int setpgid(pid_t pid, pid_t pgid); diff --git a/Userland/.gitignore b/Userland/.gitignore index 77d7e70b48..d116c680f7 100644 --- a/Userland/.gitignore +++ b/Userland/.gitignore @@ -16,3 +16,4 @@ tst mm kill tty +ft diff --git a/Userland/Makefile b/Userland/Makefile index c94558e2d6..91c8f308c7 100644 --- a/Userland/Makefile +++ b/Userland/Makefile @@ -14,6 +14,7 @@ OBJS = \ tst.o \ mm.o \ kill.o \ + ft.o \ tty.o APPS = \ @@ -32,6 +33,7 @@ APPS = \ tst \ mm \ kill \ + ft \ tty ARCH_FLAGS = @@ -91,6 +93,9 @@ clear: clear.o tst: tst.o $(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a +ft: ft.o + $(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a + mm: mm.o $(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a diff --git a/Userland/ft.cpp b/Userland/ft.cpp new file mode 100644 index 0000000000..3c66b2b41e --- /dev/null +++ b/Userland/ft.cpp @@ -0,0 +1,14 @@ +#include +#include + +int main(int argc, char** argv) +{ + printf("Testing fork()...\n"); + pid_t pid = fork(); + if (!pid) { + printf("child, pid=%d\n", getpid()); + } else { + printf("parent, child pid=%d\n", pid); + } + return 0; +} diff --git a/Userland/sh.cpp b/Userland/sh.cpp index 524658e27f..6efa9ce4de 100644 --- a/Userland/sh.cpp +++ b/Userland/sh.cpp @@ -32,6 +32,13 @@ static int sh_pwd(int, const char**) return 0; } +static int sh_fork(int, const char**) +{ + pid_t pid = fork(); + printf("getpid()=%d, fork()=%d\n", getpid(), pid); + return 0; +} + static int sh_exit(int, const char**) { printf("Good-bye!\n"); @@ -94,6 +101,11 @@ static bool handle_builtin(int argc, const char** argv, int& retval) retval = sh_exit(argc, argv); return true; } + + if (!strcmp(argv[0], "fork")) { + retval = sh_fork(argc, argv); + return true; + } return false; } diff --git a/VirtualFileSystem/FileHandle.cpp b/VirtualFileSystem/FileHandle.cpp index 49199f6b22..f56b43cd6b 100644 --- a/VirtualFileSystem/FileHandle.cpp +++ b/VirtualFileSystem/FileHandle.cpp @@ -15,6 +15,19 @@ FileHandle::~FileHandle() { } +OwnPtr FileHandle::clone() +{ + auto handle = make(m_vnode.copyRef()); + if (!handle) + return nullptr; + handle->m_currentOffset = m_currentOffset; +#ifdef SERENITY + handle->m_fd = m_fd; + handle->m_isBlocking = m_isBlocking; +#endif + return handle; +} + #ifndef SERENITY bool additionWouldOverflow(Unix::off_t a, Unix::off_t b) { diff --git a/VirtualFileSystem/FileHandle.h b/VirtualFileSystem/FileHandle.h index bf6a842891..1a9757e3ae 100644 --- a/VirtualFileSystem/FileHandle.h +++ b/VirtualFileSystem/FileHandle.h @@ -11,6 +11,8 @@ public: explicit FileHandle(RetainPtr&&); ~FileHandle(); + OwnPtr clone(); + int close(); Unix::off_t seek(Unix::off_t, int whence);