1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 08:08:12 +00:00

Implement fork()!

This is quite cool! The syscall entry point plumbs the register dump
down to sys$fork(), which uses it to set up the child process's TSS
in order to resume execution right after the int 0x80 fork() call. :^)

This works pretty well, although there is some problem with the kernel
alias mappings used to clone the parent process's regions. If I disable
the MM::release_page_directory() code, there's no problem. Probably there's
a premature freeing of a physical page somehow.
This commit is contained in:
Andreas Kling 2018-11-02 20:41:58 +01:00
parent 10b666f69a
commit 8accc92c3c
16 changed files with 228 additions and 78 deletions

View file

@ -407,3 +407,23 @@ bool MemoryManager::validate_user_write(const Process& process, LinearAddress la
return false; return false;
return true; return true;
} }
RetainPtr<Region> Region::clone()
{
InterruptDisabler disabler;
KernelPagingScope pagingScope;
// FIXME: Implement COW regions.
auto clone_zone = MM.createZone(zone->size());
auto clone_region = adopt(*new Region(linearAddress, size, move(clone_zone), String(name)));
// FIXME: It would be cool to make the src_alias a read-only mapping.
byte* src_alias = MM.create_kernel_alias_for_region(*this);
byte* dest_alias = MM.create_kernel_alias_for_region(*clone_region);
memcpy(dest_alias, src_alias, size);
MM.remove_kernel_alias_for_region(*clone_region, dest_alias);
MM.remove_kernel_alias_for_region(*this, src_alias);
return clone_region;
}

View file

@ -40,6 +40,8 @@ private:
struct Region : public Retainable<Region> { struct Region : public Retainable<Region> {
Region(LinearAddress, size_t, RetainPtr<Zone>&&, String&&); Region(LinearAddress, size_t, RetainPtr<Zone>&&, String&&);
~Region(); ~Region();
RetainPtr<Region> clone();
LinearAddress linearAddress; LinearAddress linearAddress;
size_t size { 0 }; size_t size { 0 };
RetainPtr<Zone> zone; RetainPtr<Zone> zone;

View file

@ -105,30 +105,6 @@ void Process::initialize()
loadTaskRegister(s_kernelProcess->selector()); loadTaskRegister(s_kernelProcess->selector());
} }
void Process::allocateLDT()
{
ASSERT(!m_tss.ldt);
static const WORD numLDTEntries = 4;
m_ldt_selector = gdt_alloc_entry();
m_ldtEntries = new Descriptor[numLDTEntries];
#if 0
kprintf("new ldt selector = %x\n", m_ldt_selector);
kprintf("new ldt table at = %p\n", m_ldtEntries);
kprintf("new ldt table size = %u\n", (numLDTEntries * 8) - 1);
#endif
Descriptor& ldt = getGDTEntry(m_ldt_selector);
ldt.setBase(m_ldtEntries);
ldt.setLimit(numLDTEntries * 8 - 1);
ldt.dpl = 0;
ldt.segment_present = 1;
ldt.granularity = 0;
ldt.zero = 0;
ldt.operation_size = 1;
ldt.descriptor_type = 0;
ldt.type = Descriptor::LDT;
m_tss.ldt = m_ldt_selector;
}
template<typename Callback> template<typename Callback>
static void forEachProcess(Callback callback) static void forEachProcess(Callback callback)
{ {
@ -235,6 +211,77 @@ int Process::sys$gethostname(char* buffer, size_t size)
return 0; return 0;
} }
Process* Process::fork(RegisterDump& regs)
{
auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this);
#ifdef FORK_DEBUG
dbgprintf("fork: child=%p\n", child);
#endif
#if 0
// FIXME: An honest fork() would copy these. Needs a Vector copy ctor.
child->m_arguments = m_arguments;
child->m_initialEnvironment = m_initialEnvironment;
#endif
for (auto& region : m_regions) {
#ifdef FORK_DEBUG
dbgprintf("fork: cloning Region{%p}\n", region.ptr());
#endif
auto cloned_region = region->clone();
// FIXME: Move subregions into Region?
for (auto& subregion : m_subregions) {
if (subregion->region.ptr() != region.ptr())
continue;
#ifdef FORK_DEBUG
dbgprintf("fork: cloning Subregion{%p}\n", subregion.ptr());
#endif
auto cloned_subregion = make<Subregion>(*cloned_region, subregion->offset, subregion->size, subregion->linearAddress, String(subregion->name));
child->m_subregions.append(move(cloned_subregion));
MM.mapSubregion(*child, *child->m_subregions.last());
}
child->m_regions.append(move(cloned_region));
MM.mapRegion(*child, *child->m_regions.last());
}
child->m_tss.eax = 0; // fork() returns 0 in the child :^)
child->m_tss.ebx = regs.ebx;
child->m_tss.ecx = regs.ecx;
child->m_tss.edx = regs.edx;
child->m_tss.ebp = regs.ebp;
child->m_tss.esp = regs.esp_if_crossRing;
child->m_tss.esi = regs.esi;
child->m_tss.edi = regs.edi;
child->m_tss.eflags = regs.eflags;
child->m_tss.eip = regs.eip;
child->m_tss.cs = regs.cs;
child->m_tss.ds = regs.ds;
child->m_tss.es = regs.es;
child->m_tss.fs = regs.fs;
child->m_tss.gs = regs.gs;
child->m_tss.ss = regs.ss_if_crossRing;
#ifdef FORK_DEBUG
dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp);
#endif
ProcFileSystem::the().addProcess(*child);
s_processes->prepend(child);
system.nprocess++;
#ifdef TASK_DEBUG
kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
#endif
return child;
}
pid_t Process::sys$fork(RegisterDump& regs)
{
auto* child = fork(regs);
ASSERT(child);
return child->pid();
}
int Process::sys$spawn(const char* path, const char** args) int Process::sys$spawn(const char* path, const char** args)
{ {
if (args) { if (args) {
@ -413,9 +460,9 @@ Process* Process::createKernelProcess(void (*e)(), String&& name)
return process; return process;
} }
Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty) Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty, Process* fork_parent)
: m_name(move(name)) : m_name(move(name))
, m_pid(next_pid++) , m_pid(next_pid++) // FIXME: RACE: This variable looks racy!
, m_uid(uid) , m_uid(uid)
, m_gid(gid) , m_gid(gid)
, m_state(Runnable) , m_state(Runnable)
@ -425,57 +472,71 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
, m_tty(tty) , m_tty(tty)
, m_parentPID(parentPID) , m_parentPID(parentPID)
{ {
{ if (fork_parent) {
m_sid = fork_parent->m_sid;
m_pgid = fork_parent->m_pgid;
} else {
// FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though.. // FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though..
InterruptDisabler disabler; InterruptDisabler disabler;
if (auto* parent = Process::fromPID(m_parentPID)) { if (auto* parent = Process::fromPID(m_parentPID)) {
m_sid = parent->m_sid; m_sid = parent->m_sid;
m_pgid = parent->m_pgid; m_pgid = parent->m_pgid;
} }
} }
m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory)); m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory));
MM.populate_page_directory(*this); MM.populate_page_directory(*this);
m_file_descriptors.resize(m_max_open_file_descriptors); if (fork_parent) {
m_file_descriptors.resize(fork_parent->m_file_descriptors.size());
if (tty) { for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) {
m_file_descriptors[0] = tty->open(O_RDONLY); if (!fork_parent->m_file_descriptors[i])
m_file_descriptors[1] = tty->open(O_WRONLY); continue;
m_file_descriptors[2] = tty->open(O_WRONLY); #ifdef FORK_DEBUG
} dbgprintf("fork: cloning fd %u... (%p) istty? %um\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY());
#endif
m_nextRegion = LinearAddress(0x10000000); m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone();
}
memset(&m_tss, 0, sizeof(m_tss));
if (isRing3()) {
memset(&m_ldtEntries, 0, sizeof(m_ldtEntries));
allocateLDT();
}
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
word cs, ds, ss;
if (isRing0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
} else { } else {
cs = 0x1b; m_file_descriptors.resize(m_max_open_file_descriptors);
ds = 0x23; if (tty) {
ss = 0x23; m_file_descriptors[0] = tty->open(O_RDONLY);
m_file_descriptors[1] = tty->open(O_WRONLY);
m_file_descriptors[2] = tty->open(O_WRONLY);
}
} }
m_tss.ds = ds; if (fork_parent)
m_tss.es = ds; m_nextRegion = fork_parent->m_nextRegion;
m_tss.fs = ds; else
m_tss.gs = ds; m_nextRegion = LinearAddress(0x10000000);
m_tss.ss = ss;
m_tss.cs = cs; if (fork_parent) {
memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss));
} else {
memset(&m_tss, 0, sizeof(m_tss));
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
word cs, ds, ss;
if (isRing0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
} else {
cs = 0x1b;
ds = 0x23;
ss = 0x23;
}
m_tss.ds = ds;
m_tss.es = ds;
m_tss.fs = ds;
m_tss.gs = ds;
m_tss.ss = ss;
m_tss.cs = cs;
}
m_tss.cr3 = (dword)m_page_directory; m_tss.cr3 = (dword)m_page_directory;
@ -486,10 +547,14 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8; m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8;
m_tss.esp = m_stackTop0; m_tss.esp = m_stackTop0;
} else { } else {
auto* region = allocateRegion(defaultStackSize, "stack"); if (fork_parent) {
ASSERT(region); m_stackTop3 = fork_parent->m_stackTop3;
m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8; } else {
m_tss.esp = m_stackTop3; auto* region = allocateRegion(defaultStackSize, "stack");
ASSERT(region);
m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
m_tss.esp = m_stackTop3;
}
} }
if (isRing3()) { if (isRing3()) {
@ -511,12 +576,6 @@ Process::~Process()
ProcFileSystem::the().removeProcess(*this); ProcFileSystem::the().removeProcess(*this);
system.nprocess--; system.nprocess--;
if (isRing3()) {
delete [] m_ldtEntries;
m_ldtEntries = nullptr;
gdt_free_entry(m_ldt_selector);
}
gdt_free_entry(selector()); gdt_free_entry(selector());
if (m_kernelStack) { if (m_kernelStack) {

View file

@ -119,6 +119,7 @@ public:
int sys$uname(utsname*); int sys$uname(utsname*);
int sys$readlink(const char*, char*, size_t); int sys$readlink(const char*, char*, size_t);
int sys$ttyname_r(int fd, char*, size_t); int sys$ttyname_r(int fd, char*, size_t);
pid_t sys$fork(RegisterDump&);
static void initialize(); static void initialize();
@ -155,11 +156,13 @@ public:
void send_signal(int signal, Process* sender); void send_signal(int signal, Process* sender);
Process* fork(RegisterDump&);
private: private:
friend class MemoryManager; friend class MemoryManager;
friend bool scheduleNewProcess(); friend bool scheduleNewProcess();
Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr); Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
void allocateLDT(); void allocateLDT();
@ -183,8 +186,6 @@ private:
State m_state { Invalid }; State m_state { Invalid };
DWORD m_wakeupTime { 0 }; DWORD m_wakeupTime { 0 };
TSS32 m_tss; TSS32 m_tss;
word m_ldt_selector { 0 };
Descriptor* m_ldtEntries { nullptr };
Vector<OwnPtr<FileHandle>> m_file_descriptors; Vector<OwnPtr<FileHandle>> m_file_descriptors;
RingLevel m_ring { Ring0 }; RingLevel m_ring { Ring0 };
int m_error { 0 }; int m_error { 0 };

View file

@ -43,7 +43,7 @@ void initialize()
kprintf("syscall: int 0x80 handler installed\n"); kprintf("syscall: int 0x80 handler installed\n");
} }
DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3) static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
{ {
ASSERT_INTERRUPTS_ENABLED(); ASSERT_INTERRUPTS_ENABLED();
switch (function) { switch (function) {
@ -128,6 +128,8 @@ DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
return current->sys$tcgetpgrp((int)arg1); return current->sys$tcgetpgrp((int)arg1);
case Syscall::PosixTcsetpgrp: case Syscall::PosixTcsetpgrp:
return current->sys$tcsetpgrp((int)arg1, (pid_t)arg2); return current->sys$tcsetpgrp((int)arg1, (pid_t)arg2);
case Syscall::PosixFork:
return current->sys$fork(regs);
default: default:
kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3); kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3);
break; break;
@ -143,5 +145,6 @@ void syscall_entry(RegisterDump& regs)
DWORD arg1 = regs.edx; DWORD arg1 = regs.edx;
DWORD arg2 = regs.ecx; DWORD arg2 = regs.ecx;
DWORD arg3 = regs.ebx; DWORD arg3 = regs.ebx;
regs.eax = Syscall::handle(function, arg1, arg2, arg3); regs.eax = Syscall::handle(regs, function, arg1, arg2, arg3);
} }

View file

@ -47,6 +47,7 @@ enum Function {
PosixGetpgrp = 0x2015, PosixGetpgrp = 0x2015,
PosixTcsetpgrp = 0x2016, PosixTcsetpgrp = 0x2016,
PosixTcgetpgrp = 0x2017, PosixTcgetpgrp = 0x2017,
PosixFork = 0x2018,
}; };
void initialize(); void initialize();

View file

@ -26,6 +26,8 @@ typedef struct
#define ETERNAL_BASE_PHYSICAL 0x200000 #define ETERNAL_BASE_PHYSICAL 0x200000
#define BASE_PHYS 0x100000 #define BASE_PHYS 0x100000
#define RANGE_SIZE 0x100000
PRIVATE BYTE alloc_map[POOL_SIZE / CHUNK_SIZE / 8]; PRIVATE BYTE alloc_map[POOL_SIZE / CHUNK_SIZE / 8];
volatile DWORD sum_alloc = 0; volatile DWORD sum_alloc = 0;
@ -36,6 +38,9 @@ volatile size_t kmalloc_sum_page_aligned = 0;
static byte* s_next_eternal_ptr; static byte* s_next_eternal_ptr;
static byte* s_next_page_aligned_ptr; static byte* s_next_page_aligned_ptr;
static byte* s_end_of_eternal_range;
static byte* s_end_of_page_aligned_range;
bool is_kmalloc_address(void* ptr) bool is_kmalloc_address(void* ptr)
{ {
if (ptr >= (byte*)ETERNAL_BASE_PHYSICAL && ptr < s_next_eternal_ptr) if (ptr >= (byte*)ETERNAL_BASE_PHYSICAL && ptr < s_next_eternal_ptr)
@ -58,12 +63,16 @@ kmalloc_init()
s_next_eternal_ptr = (byte*)ETERNAL_BASE_PHYSICAL; s_next_eternal_ptr = (byte*)ETERNAL_BASE_PHYSICAL;
s_next_page_aligned_ptr = (byte*)PAGE_ALIGNED_BASE_PHYSICAL; s_next_page_aligned_ptr = (byte*)PAGE_ALIGNED_BASE_PHYSICAL;
s_end_of_eternal_range = s_next_eternal_ptr + RANGE_SIZE;
s_end_of_page_aligned_range = s_next_page_aligned_ptr + RANGE_SIZE;
} }
void* kmalloc_eternal(size_t size) void* kmalloc_eternal(size_t size)
{ {
void* ptr = s_next_eternal_ptr; void* ptr = s_next_eternal_ptr;
s_next_eternal_ptr += size; s_next_eternal_ptr += size;
ASSERT(s_next_eternal_ptr < s_end_of_eternal_range);
kmalloc_sum_eternal += size; kmalloc_sum_eternal += size;
return ptr; return ptr;
} }
@ -73,6 +82,7 @@ void* kmalloc_page_aligned(size_t size)
ASSERT((size % 4096) == 0); ASSERT((size % 4096) == 0);
void* ptr = s_next_page_aligned_ptr; void* ptr = s_next_page_aligned_ptr;
s_next_page_aligned_ptr += size; s_next_page_aligned_ptr += size;
ASSERT(s_next_page_aligned_ptr < s_end_of_page_aligned_range);
kmalloc_sum_page_aligned += size; kmalloc_sum_page_aligned += size;
return ptr; return ptr;
} }

View file

@ -18,6 +18,7 @@ cp ../Userland/cat mnt/bin/cat
cp ../Userland/uname mnt/bin/uname cp ../Userland/uname mnt/bin/uname
cp ../Userland/clear mnt/bin/clear cp ../Userland/clear mnt/bin/clear
cp ../Userland/tst mnt/bin/tst cp ../Userland/tst mnt/bin/tst
cp ../Userland/ft mnt/bin/ft
cp ../Userland/mm mnt/bin/mm cp ../Userland/mm mnt/bin/mm
cp ../Userland/kill mnt/bin/kill cp ../Userland/kill mnt/bin/kill
cp ../Userland/tty mnt/bin/tty cp ../Userland/tty mnt/bin/tty

View file

@ -5,6 +5,11 @@
extern "C" { extern "C" {
pid_t fork()
{
return Syscall::invoke(Syscall::PosixFork);
}
uid_t getuid() uid_t getuid()
{ {
return Syscall::invoke(Syscall::PosixGetuid); return Syscall::invoke(Syscall::PosixGetuid);

View file

@ -8,6 +8,7 @@ __BEGIN_DECLS
extern char** environ; extern char** environ;
inline int getpagesize() { return 4096; } inline int getpagesize() { return 4096; }
pid_t fork();
pid_t getsid(pid_t); pid_t getsid(pid_t);
pid_t setsid(); pid_t setsid();
int setpgid(pid_t pid, pid_t pgid); int setpgid(pid_t pid, pid_t pgid);

1
Userland/.gitignore vendored
View file

@ -16,3 +16,4 @@ tst
mm mm
kill kill
tty tty
ft

View file

@ -14,6 +14,7 @@ OBJS = \
tst.o \ tst.o \
mm.o \ mm.o \
kill.o \ kill.o \
ft.o \
tty.o tty.o
APPS = \ APPS = \
@ -32,6 +33,7 @@ APPS = \
tst \ tst \
mm \ mm \
kill \ kill \
ft \
tty tty
ARCH_FLAGS = ARCH_FLAGS =
@ -91,6 +93,9 @@ clear: clear.o
tst: tst.o tst: tst.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a $(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
ft: ft.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
mm: mm.o mm: mm.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a $(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a

14
Userland/ft.cpp Normal file
View file

@ -0,0 +1,14 @@
#include <stdio.h>
#include <unistd.h>
int main(int argc, char** argv)
{
printf("Testing fork()...\n");
pid_t pid = fork();
if (!pid) {
printf("child, pid=%d\n", getpid());
} else {
printf("parent, child pid=%d\n", pid);
}
return 0;
}

View file

@ -32,6 +32,13 @@ static int sh_pwd(int, const char**)
return 0; return 0;
} }
static int sh_fork(int, const char**)
{
pid_t pid = fork();
printf("getpid()=%d, fork()=%d\n", getpid(), pid);
return 0;
}
static int sh_exit(int, const char**) static int sh_exit(int, const char**)
{ {
printf("Good-bye!\n"); printf("Good-bye!\n");
@ -94,6 +101,11 @@ static bool handle_builtin(int argc, const char** argv, int& retval)
retval = sh_exit(argc, argv); retval = sh_exit(argc, argv);
return true; return true;
} }
if (!strcmp(argv[0], "fork")) {
retval = sh_fork(argc, argv);
return true;
}
return false; return false;
} }

View file

@ -15,6 +15,19 @@ FileHandle::~FileHandle()
{ {
} }
OwnPtr<FileHandle> FileHandle::clone()
{
auto handle = make<FileHandle>(m_vnode.copyRef());
if (!handle)
return nullptr;
handle->m_currentOffset = m_currentOffset;
#ifdef SERENITY
handle->m_fd = m_fd;
handle->m_isBlocking = m_isBlocking;
#endif
return handle;
}
#ifndef SERENITY #ifndef SERENITY
bool additionWouldOverflow(Unix::off_t a, Unix::off_t b) bool additionWouldOverflow(Unix::off_t a, Unix::off_t b)
{ {

View file

@ -11,6 +11,8 @@ public:
explicit FileHandle(RetainPtr<VirtualFileSystem::Node>&&); explicit FileHandle(RetainPtr<VirtualFileSystem::Node>&&);
~FileHandle(); ~FileHandle();
OwnPtr<FileHandle> clone();
int close(); int close();
Unix::off_t seek(Unix::off_t, int whence); Unix::off_t seek(Unix::off_t, int whence);