1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-21 15:05:07 +00:00
serenity/Kernel/API/Syscall.h
Liav A 5e062414c1 Kernel: Add support for jails
Our implementation for Jails resembles much of how FreeBSD jails are
working - it's essentially only a matter of using a RefPtr in the
Process class to a Jail object. Then, when we iterate over all processes
in various cases, we could ensure if either the current process is in
jail and therefore should be restricted what is visible in terms of
PID isolation, and also to be able to expose metadata about Jails in
/sys/kernel/jails node (which does not reveal anything to a process
which is in jail).

A lifetime model for the Jail object is currently plain simple - there's
simpy no way to manually delete a Jail object once it was created. Such
feature should be carefully designed to allow safe destruction of a Jail
without the possibility of releasing a process which is in Jail from the
actual jail. Each process which is attached into a Jail cannot leave it
until the end of a Process (i.e. when finalizing a Process). All jails
are kept being referenced in the JailManagement. When a last attached
process is finalized, the Jail is automatically destroyed.
2022-11-05 18:00:58 -06:00

660 lines
21 KiB
C++

/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Types.h>
#include <AK/Userspace.h>
#include <Kernel/API/POSIX/sched.h>
constexpr int syscall_vector = 0x82;
extern "C" {
struct pollfd;
struct timeval;
struct timespec;
struct sockaddr;
struct siginfo;
struct stat;
struct statvfs;
typedef u32 socklen_t;
}
namespace Kernel {
enum class NeedsBigProcessLock {
Yes,
No
};
// Declare all syscalls and associated metadata.
//
// NOTE: When declaring a new syscall or modifying an existing, please
// ensure that the proper assert is present at the top of the syscall
// implementation to both verify and document to any readers if the
// syscall acquires the big process lock or not. The asserts are:
// - VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
// - VERIFY_NO_PROCESS_BIG_LOCK(this)
//
#define ENUMERATE_SYSCALLS(S) \
S(accept4, NeedsBigProcessLock::No) \
S(access, NeedsBigProcessLock::Yes) \
S(adjtime, NeedsBigProcessLock::No) \
S(alarm, NeedsBigProcessLock::Yes) \
S(allocate_tls, NeedsBigProcessLock::Yes) \
S(anon_create, NeedsBigProcessLock::No) \
S(beep, NeedsBigProcessLock::No) \
S(bind, NeedsBigProcessLock::No) \
S(chdir, NeedsBigProcessLock::No) \
S(chmod, NeedsBigProcessLock::No) \
S(chown, NeedsBigProcessLock::No) \
S(clock_gettime, NeedsBigProcessLock::No) \
S(clock_nanosleep, NeedsBigProcessLock::No) \
S(clock_getres, NeedsBigProcessLock::No) \
S(clock_settime, NeedsBigProcessLock::No) \
S(close, NeedsBigProcessLock::No) \
S(connect, NeedsBigProcessLock::No) \
S(create_inode_watcher, NeedsBigProcessLock::Yes) \
S(create_thread, NeedsBigProcessLock::Yes) \
S(dbgputstr, NeedsBigProcessLock::No) \
S(detach_thread, NeedsBigProcessLock::Yes) \
S(disown, NeedsBigProcessLock::Yes) \
S(dump_backtrace, NeedsBigProcessLock::No) \
S(dup2, NeedsBigProcessLock::No) \
S(emuctl, NeedsBigProcessLock::No) \
S(execve, NeedsBigProcessLock::Yes) \
S(exit, NeedsBigProcessLock::Yes) \
S(exit_thread, NeedsBigProcessLock::Yes) \
S(fchdir, NeedsBigProcessLock::No) \
S(fchmod, NeedsBigProcessLock::No) \
S(fchown, NeedsBigProcessLock::No) \
S(fcntl, NeedsBigProcessLock::Yes) \
S(fork, NeedsBigProcessLock::Yes) \
S(fstat, NeedsBigProcessLock::No) \
S(fstatvfs, NeedsBigProcessLock::No) \
S(fsync, NeedsBigProcessLock::No) \
S(ftruncate, NeedsBigProcessLock::No) \
S(futex, NeedsBigProcessLock::Yes) \
S(get_dir_entries, NeedsBigProcessLock::Yes) \
S(get_process_name, NeedsBigProcessLock::Yes) \
S(get_stack_bounds, NeedsBigProcessLock::No) \
S(get_thread_name, NeedsBigProcessLock::Yes) \
S(getcwd, NeedsBigProcessLock::No) \
S(getegid, NeedsBigProcessLock::No) \
S(geteuid, NeedsBigProcessLock::No) \
S(getgid, NeedsBigProcessLock::No) \
S(getgroups, NeedsBigProcessLock::No) \
S(gethostname, NeedsBigProcessLock::No) \
S(getkeymap, NeedsBigProcessLock::No) \
S(getpeername, NeedsBigProcessLock::Yes) \
S(getpgid, NeedsBigProcessLock::Yes) \
S(getpgrp, NeedsBigProcessLock::Yes) \
S(getpid, NeedsBigProcessLock::No) \
S(getppid, NeedsBigProcessLock::No) \
S(getrandom, NeedsBigProcessLock::No) \
S(getresgid, NeedsBigProcessLock::No) \
S(getresuid, NeedsBigProcessLock::No) \
S(getrusage, NeedsBigProcessLock::Yes) \
S(getsid, NeedsBigProcessLock::Yes) \
S(getsockname, NeedsBigProcessLock::Yes) \
S(getsockopt, NeedsBigProcessLock::No) \
S(gettid, NeedsBigProcessLock::No) \
S(getuid, NeedsBigProcessLock::No) \
S(inode_watcher_add_watch, NeedsBigProcessLock::Yes) \
S(inode_watcher_remove_watch, NeedsBigProcessLock::Yes) \
S(ioctl, NeedsBigProcessLock::Yes) \
S(join_thread, NeedsBigProcessLock::Yes) \
S(jail_create, NeedsBigProcessLock::No) \
S(jail_attach, NeedsBigProcessLock::No) \
S(kill, NeedsBigProcessLock::Yes) \
S(kill_thread, NeedsBigProcessLock::Yes) \
S(killpg, NeedsBigProcessLock::Yes) \
S(link, NeedsBigProcessLock::No) \
S(listen, NeedsBigProcessLock::No) \
S(lseek, NeedsBigProcessLock::No) \
S(madvise, NeedsBigProcessLock::Yes) \
S(map_time_page, NeedsBigProcessLock::Yes) \
S(mkdir, NeedsBigProcessLock::No) \
S(mknod, NeedsBigProcessLock::No) \
S(mmap, NeedsBigProcessLock::Yes) \
S(mount, NeedsBigProcessLock::Yes) \
S(mprotect, NeedsBigProcessLock::Yes) \
S(mremap, NeedsBigProcessLock::Yes) \
S(msync, NeedsBigProcessLock::Yes) \
S(msyscall, NeedsBigProcessLock::No) \
S(munmap, NeedsBigProcessLock::Yes) \
S(open, NeedsBigProcessLock::Yes) \
S(perf_event, NeedsBigProcessLock::Yes) \
S(perf_register_string, NeedsBigProcessLock::Yes) \
S(pipe, NeedsBigProcessLock::No) \
S(pledge, NeedsBigProcessLock::Yes) \
S(poll, NeedsBigProcessLock::Yes) \
S(posix_fallocate, NeedsBigProcessLock::No) \
S(prctl, NeedsBigProcessLock::Yes) \
S(profiling_disable, NeedsBigProcessLock::Yes) \
S(profiling_enable, NeedsBigProcessLock::Yes) \
S(profiling_free_buffer, NeedsBigProcessLock::Yes) \
S(ptrace, NeedsBigProcessLock::Yes) \
S(purge, NeedsBigProcessLock::Yes) \
S(read, NeedsBigProcessLock::Yes) \
S(pread, NeedsBigProcessLock::Yes) \
S(readlink, NeedsBigProcessLock::No) \
S(readv, NeedsBigProcessLock::Yes) \
S(realpath, NeedsBigProcessLock::No) \
S(recvfd, NeedsBigProcessLock::No) \
S(recvmsg, NeedsBigProcessLock::Yes) \
S(rename, NeedsBigProcessLock::No) \
S(rmdir, NeedsBigProcessLock::No) \
S(scheduler_get_parameters, NeedsBigProcessLock::No) \
S(scheduler_set_parameters, NeedsBigProcessLock::No) \
S(sendfd, NeedsBigProcessLock::No) \
S(sendmsg, NeedsBigProcessLock::Yes) \
S(set_coredump_metadata, NeedsBigProcessLock::No) \
S(set_mmap_name, NeedsBigProcessLock::Yes) \
S(set_process_name, NeedsBigProcessLock::Yes) \
S(set_thread_name, NeedsBigProcessLock::Yes) \
S(setegid, NeedsBigProcessLock::No) \
S(seteuid, NeedsBigProcessLock::No) \
S(setgid, NeedsBigProcessLock::No) \
S(setgroups, NeedsBigProcessLock::No) \
S(sethostname, NeedsBigProcessLock::No) \
S(setkeymap, NeedsBigProcessLock::No) \
S(setpgid, NeedsBigProcessLock::Yes) \
S(setresgid, NeedsBigProcessLock::No) \
S(setresuid, NeedsBigProcessLock::No) \
S(setreuid, NeedsBigProcessLock::No) \
S(setsid, NeedsBigProcessLock::Yes) \
S(setsockopt, NeedsBigProcessLock::No) \
S(setuid, NeedsBigProcessLock::No) \
S(shutdown, NeedsBigProcessLock::No) \
S(sigaction, NeedsBigProcessLock::Yes) \
S(sigaltstack, NeedsBigProcessLock::Yes) \
S(sigpending, NeedsBigProcessLock::Yes) \
S(sigprocmask, NeedsBigProcessLock::Yes) \
S(sigreturn, NeedsBigProcessLock::Yes) \
S(sigsuspend, NeedsBigProcessLock::Yes) \
S(sigtimedwait, NeedsBigProcessLock::Yes) \
S(socket, NeedsBigProcessLock::No) \
S(socketpair, NeedsBigProcessLock::No) \
S(stat, NeedsBigProcessLock::No) \
S(statvfs, NeedsBigProcessLock::No) \
S(symlink, NeedsBigProcessLock::No) \
S(sync, NeedsBigProcessLock::No) \
S(sysconf, NeedsBigProcessLock::No) \
S(times, NeedsBigProcessLock::Yes) \
S(umask, NeedsBigProcessLock::Yes) \
S(umount, NeedsBigProcessLock::Yes) \
S(uname, NeedsBigProcessLock::No) \
S(unlink, NeedsBigProcessLock::No) \
S(unveil, NeedsBigProcessLock::No) \
S(utime, NeedsBigProcessLock::No) \
S(utimensat, NeedsBigProcessLock::No) \
S(waitid, NeedsBigProcessLock::Yes) \
S(write, NeedsBigProcessLock::Yes) \
S(writev, NeedsBigProcessLock::Yes) \
S(yield, NeedsBigProcessLock::No)
namespace Syscall {
enum Function {
#undef __ENUMERATE_SYSCALL
#define __ENUMERATE_SYSCALL(sys_call, needs_lock) SC_##sys_call,
ENUMERATE_SYSCALLS(__ENUMERATE_SYSCALL)
#undef __ENUMERATE_SYSCALL
__Count
};
#ifdef AK_OS_SERENITY
struct StringArgument {
char const* characters;
size_t length { 0 };
};
template<typename DataType, typename SizeType>
struct MutableBufferArgument {
DataType* data { nullptr };
SizeType size { 0 };
};
struct StringListArgument {
StringArgument* strings {};
size_t length { 0 };
};
struct SC_mmap_params {
void* addr;
size_t size;
size_t alignment;
int32_t prot;
int32_t flags;
int32_t fd;
int64_t offset;
StringArgument name;
};
struct SC_mremap_params {
void* old_address;
size_t old_size;
size_t new_size;
int32_t flags;
};
struct SC_open_params {
int dirfd;
StringArgument path;
int options;
u16 mode;
};
struct SC_poll_params {
struct pollfd* fds;
unsigned nfds;
const struct timespec* timeout;
u32 const* sigmask;
};
struct SC_clock_nanosleep_params {
int clock_id;
int flags;
const struct timespec* requested_sleep;
struct timespec* remaining_sleep;
};
struct SC_clock_getres_params {
int clock_id;
struct timespec* result;
};
struct SC_accept4_params {
sockaddr* addr;
socklen_t* addrlen;
int sockfd;
int flags;
};
struct SC_getsockopt_params {
int sockfd;
int level;
int option;
void* value;
socklen_t* value_size;
};
struct SC_setsockopt_params {
void const* value;
int sockfd;
int level;
int option;
socklen_t value_size;
};
struct SC_getsockname_params {
int sockfd;
sockaddr* addr;
socklen_t* addrlen;
};
struct SC_getpeername_params {
int sockfd;
sockaddr* addr;
socklen_t* addrlen;
};
struct SC_socketpair_params {
int domain;
int type;
int protocol;
int* sv;
};
struct SC_futex_params {
u32* userspace_address;
int futex_op;
u32 val;
union {
timespec const* timeout;
uintptr_t val2;
};
u32* userspace_address2;
u32 val3;
};
struct SC_setkeymap_params {
u32 const* map;
u32 const* shift_map;
u32 const* alt_map;
u32 const* altgr_map;
u32 const* shift_altgr_map;
StringArgument map_name;
};
struct SC_jail_create_params {
u64 index;
StringArgument name;
};
struct SC_jail_attach_params {
u64 index;
};
struct SC_getkeymap_params {
u32* map;
u32* shift_map;
u32* alt_map;
u32* altgr_map;
u32* shift_altgr_map;
MutableBufferArgument<char, size_t> map_name;
};
struct SC_create_thread_params {
unsigned int detach_state = 0; // JOINABLE or DETACHED
int schedule_priority = 30; // THREAD_PRIORITY_NORMAL
// FIXME: Implement guard pages in create_thread (unreadable pages at "overflow" end of stack)
// "If an implementation rounds up the value of guardsize to a multiple of {PAGESIZE},
// a call to pthread_attr_getguardsize() specifying attr shall store in the guardsize
// parameter the guard size specified by the previous pthread_attr_setguardsize() function call"
// ... ok, if you say so posix. Guess we get to lie to people about guard page size
unsigned int guard_page_size = 0; // Rounded up to PAGE_SIZE
unsigned int reported_guard_page_size = 0; // The lie we tell callers
unsigned int stack_size = 1 * MiB; // Default PTHREAD_STACK_MIN
void* stack_location; // nullptr means any, o.w. process virtual address
# if ARCH(X86_64)
FlatPtr rdi;
FlatPtr rsi;
FlatPtr rcx;
FlatPtr rdx;
# endif
};
struct SC_realpath_params {
StringArgument path;
MutableBufferArgument<char, size_t> buffer;
};
struct SC_set_mmap_name_params {
void* addr;
size_t size;
StringArgument name;
};
struct SC_execve_params {
StringArgument path;
StringListArgument arguments;
StringListArgument environment;
};
struct SC_readlink_params {
StringArgument path;
MutableBufferArgument<char, size_t> buffer;
};
struct SC_link_params {
StringArgument old_path;
StringArgument new_path;
};
struct SC_chown_params {
StringArgument path;
u32 uid;
u32 gid;
int dirfd;
int follow_symlinks;
};
struct SC_mknod_params {
StringArgument path;
u16 mode;
u32 dev;
};
struct SC_symlink_params {
StringArgument target;
StringArgument linkpath;
};
struct SC_rename_params {
StringArgument old_path;
StringArgument new_path;
};
struct SC_mount_params {
StringArgument target;
StringArgument fs_type;
int source_fd;
int flags;
};
struct SC_pledge_params {
StringArgument promises;
StringArgument execpromises;
};
struct SC_unveil_params {
StringArgument path;
StringArgument permissions;
};
struct SC_utimensat_params {
int dirfd;
StringArgument path;
struct timespec const* times;
int flag;
};
struct SC_waitid_params {
int idtype;
int id;
struct siginfo* infop;
int options;
};
struct SC_stat_params {
StringArgument path;
struct stat* statbuf;
int dirfd;
int follow_symlinks;
};
struct SC_ptrace_buf_params {
MutableBufferArgument<u8, size_t> buf;
};
struct SC_ptrace_params {
int request;
pid_t tid;
void* addr;
FlatPtr data;
};
struct SC_set_coredump_metadata_params {
StringArgument key;
StringArgument value;
};
struct SC_inode_watcher_add_watch_params {
StringArgument user_path;
int fd;
u32 event_mask;
};
struct SC_statvfs_params {
StringArgument path;
struct statvfs* buf;
};
struct SC_chmod_params {
int dirfd;
StringArgument path;
u16 mode;
int follow_symlinks;
};
enum class SchedulerParametersMode : bool {
Process,
Thread,
};
struct SC_scheduler_parameters_params {
pid_t pid_or_tid;
SchedulerParametersMode mode;
struct sched_param parameters;
};
void initialize();
int sync();
# if ARCH(I386) || ARCH(X86_64) || ARCH(AARCH64)
inline uintptr_t invoke(Function function)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function)
: "memory");
# elif ARCH(X86_64)
asm volatile("syscall"
: "=a"(result)
: "a"(function)
: "rcx", "r11", "memory");
# elif ARCH(AARCH64)
register uintptr_t x0 asm("x0");
register uintptr_t x8 asm("x8") = function;
asm volatile("svc #0"
: "=r"(x0)
: "r"(x8)
: "memory");
result = x0;
# endif
return result;
}
template<typename T1>
inline uintptr_t invoke(Function function, T1 arg1)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1)
: "memory");
# elif ARCH(X86_64)
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1)
: "rcx", "r11", "memory");
# else
register uintptr_t x0 asm("x0");
register uintptr_t x1 asm("x1") = arg1;
register uintptr_t x8 asm("x8") = function;
asm volatile("svc #0"
: "=r"(x0)
: "r"(x1), "r"(x8)
: "memory");
result = x0;
# endif
return result;
}
template<typename T1, typename T2>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2)
: "memory");
# elif ARCH(X86_64)
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2)
: "rcx", "r11", "memory");
# else
register uintptr_t x0 asm("x0");
register uintptr_t x1 asm("x1") = arg1;
register uintptr_t x2 asm("x2") = arg2;
register uintptr_t x8 asm("x8") = function;
asm volatile("svc #0"
: "=r"(x0)
: "r"(x1), "r"(x2), "r"(x8)
: "memory");
result = x0;
# endif
return result;
}
template<typename T1, typename T2, typename T3>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2, T3 arg3)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2), "b"((uintptr_t)arg3)
: "memory");
# elif ARCH(X86_64)
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2), "b"((uintptr_t)arg3)
: "rcx", "r11", "memory");
# else
register uintptr_t x0 asm("x0");
register uintptr_t x1 asm("x1") = arg1;
register uintptr_t x2 asm("x2") = arg2;
register uintptr_t x3 asm("x3") = arg3;
register uintptr_t x8 asm("x8") = function;
asm volatile("svc #0"
: "=r"(x0)
: "r"(x1), "r"(x2), "r"(x3), "r"(x8)
: "memory");
result = x0;
# endif
return result;
}
template<typename T1, typename T2, typename T3, typename T4>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2, T3 arg3, T4 arg4)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2), "b"((uintptr_t)arg3), "S"((uintptr_t)arg4)
: "memory");
# elif ARCH(X86_64)
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2), "b"((uintptr_t)arg3), "S"((uintptr_t)arg4)
: "memory");
# else
register uintptr_t x0 asm("x0");
register uintptr_t x1 asm("x1") = arg1;
register uintptr_t x2 asm("x2") = arg2;
register uintptr_t x3 asm("x3") = arg3;
register uintptr_t x4 asm("x4") = arg4;
register uintptr_t x8 asm("x8") = function;
asm volatile("svc #0"
: "=r"(x0)
: "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x8)
: "memory");
result = x0;
# endif
return result;
}
# endif
#endif
}
#undef __ENUMERATE_SYSCALL
#define __ENUMERATE_SYSCALL(sys_call, needs_lock) using Syscall::SC_##sys_call;
ENUMERATE_SYSCALLS(__ENUMERATE_SYSCALL)
#undef __ENUMERATE_SYSCALL
}
using namespace Kernel;