From 23a7ccf607d2be901604534e1a79c3c8a3124097 Mon Sep 17 00:00:00 2001 From: Liav A Date: Thu, 15 Dec 2022 11:42:40 +0200 Subject: [PATCH] Kernel+LibCore+LibC: Split the mount syscall into multiple syscalls This is a preparation before we can create a usable mechanism to use filesystem-specific mount flags. To keep some compatibility with userland code, LibC and LibCore mount functions are kept being usable, but now instead of doing an "atomic" syscall, they do multiple syscalls to perform the complete procedure of mounting a filesystem. The FileBackedFileSystem IntrusiveList in the VFS code is now changed to be protected by a Mutex, because when we mount a new filesystem, we need to check if a filesystem is already created for a given source_fd so we do a scan for that OpenFileDescription in that list. If we fail to find an already-created filesystem we create a new one and register it in the list if we successfully mounted it. We use a Mutex because we might need to initiate disk access during the filesystem creation, which will take other mutexes in other parts of the kernel, therefore making it not possible to take a spinlock while doing this. --- Base/usr/share/man/man2/pledge.md | 1 + Kernel/API/FileSystem/MountSpecificFlags.h | 30 +++ Kernel/API/Ioctl.h | 2 + Kernel/API/Syscall.h | 23 +- Kernel/CMakeLists.txt | 1 + Kernel/Devices/Storage/StorageManagement.cpp | 4 +- Kernel/FileSystem/DevPtsFS/FileSystem.cpp | 2 +- Kernel/FileSystem/DevPtsFS/FileSystem.h | 2 +- Kernel/FileSystem/Ext2FS/FileSystem.cpp | 2 +- Kernel/FileSystem/Ext2FS/FileSystem.h | 2 +- Kernel/FileSystem/FATFS/FileSystem.cpp | 2 +- Kernel/FileSystem/FATFS/FileSystem.h | 2 +- Kernel/FileSystem/File.h | 1 + Kernel/FileSystem/ISO9660FS/FileSystem.cpp | 2 +- Kernel/FileSystem/ISO9660FS/FileSystem.h | 2 +- Kernel/FileSystem/Initializer.h | 32 +++ Kernel/FileSystem/MountFile.cpp | 122 ++++++++++ Kernel/FileSystem/MountFile.h | 43 ++++ Kernel/FileSystem/OpenFileDescription.cpp | 20 ++ Kernel/FileSystem/OpenFileDescription.h | 4 + Kernel/FileSystem/Plan9FS/FileSystem.cpp | 2 +- Kernel/FileSystem/Plan9FS/FileSystem.h | 2 +- Kernel/FileSystem/ProcFS/FileSystem.cpp | 2 +- Kernel/FileSystem/ProcFS/FileSystem.h | 2 +- Kernel/FileSystem/RAMFS/FileSystem.cpp | 2 +- Kernel/FileSystem/RAMFS/FileSystem.h | 2 +- Kernel/FileSystem/SysFS/FileSystem.cpp | 2 +- Kernel/FileSystem/SysFS/FileSystem.h | 2 +- Kernel/FileSystem/VirtualFileSystem.cpp | 218 ++++++++++++------ Kernel/FileSystem/VirtualFileSystem.h | 20 +- Kernel/Forward.h | 1 + Kernel/Syscalls/mount.cpp | 134 ++++------- Kernel/Tasks/Process.h | 18 +- .../DevTools/UserspaceEmulator/Emulator.h | 5 +- .../UserspaceEmulator/Emulator_syscalls.cpp | 52 ++++- Userland/Libraries/LibC/unistd.cpp | 56 ++++- Userland/Libraries/LibC/unistd.h | 3 + Userland/Libraries/LibCore/System.cpp | 41 +++- Userland/Libraries/LibCore/System.h | 2 + 39 files changed, 651 insertions(+), 214 deletions(-) create mode 100644 Kernel/API/FileSystem/MountSpecificFlags.h create mode 100644 Kernel/FileSystem/Initializer.h create mode 100644 Kernel/FileSystem/MountFile.cpp create mode 100644 Kernel/FileSystem/MountFile.h diff --git a/Base/usr/share/man/man2/pledge.md b/Base/usr/share/man/man2/pledge.md index 72374779b1..979b06c577 100644 --- a/Base/usr/share/man/man2/pledge.md +++ b/Base/usr/share/man/man2/pledge.md @@ -54,6 +54,7 @@ If the process later attempts to use any system functionality it has previously * `ptrace`: The [`ptrace`(2)](help://man/2/ptrace) syscall (\*) * `prot_exec`: [`mmap`(2)](help://man/2/mmap) and [`mprotect`(2)](help://man/2/mprotect) with `PROT_EXEC` * `map_fixed`: [`mmap`(2)](help://man/2/mmap) with `MAP_FIXED` or `MAP_FIXED_NOREPLACE` (\*) +* `mount`: [`mount`(2)](help://man/2/mount) Various filesystem mount related syscalls (\*) * `no_error`: Ignore requests of pledge elevation going forwards, this is useful for enforcing _execpromises_ while the child process wants to ask for more upfront (Note that the elevation requests are _not_ granted, merely ignored), this is similar to the `error` pledge in OpenBSD. * `jail`: Various jail-specific syscalls (\*) diff --git a/Kernel/API/FileSystem/MountSpecificFlags.h b/Kernel/API/FileSystem/MountSpecificFlags.h new file mode 100644 index 0000000000..b08de48da6 --- /dev/null +++ b/Kernel/API/FileSystem/MountSpecificFlags.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022-2023, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +#define MOUNT_SPECIFIC_FLAG_KEY_STRING_MAX_LENGTH 64 + +#define MOUNT_SPECIFIC_FLAG_NON_ASCII_STRING_TYPE_MAX_LENGTH 64 +#define MOUNT_SPECIFIC_FLAG_ASCII_STRING_TYPE_MAX_LENGTH 1024 + +struct MountSpecificFlag { + u32 key_string_length; + u32 value_length; + + enum class ValueType : u32 { + Boolean = 0, + UnsignedInteger, + SignedInteger, + ASCIIString, + }; + + ValueType value_type; + unsigned char const* key_string_addr; + void const* value_addr; +}; diff --git a/Kernel/API/Ioctl.h b/Kernel/API/Ioctl.h index 3f8b89e322..0785a32096 100644 --- a/Kernel/API/Ioctl.h +++ b/Kernel/API/Ioctl.h @@ -108,6 +108,7 @@ enum IOCtlNumber { KEYBOARD_IOCTL_SET_NUM_LOCK, KEYBOARD_IOCTL_GET_CAPS_LOCK, KEYBOARD_IOCTL_SET_CAPS_LOCK, + MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG, SIOCATMARK, SIOCSIFADDR, SIOCGIFADDR, @@ -190,6 +191,7 @@ enum IOCtlNumber { #define FIBMAP FIBMAP #define FIONBIO FIONBIO #define FIONREAD FIONREAD +#define MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG #define SOUNDCARD_IOCTL_SET_SAMPLE_RATE SOUNDCARD_IOCTL_SET_SAMPLE_RATE #define SOUNDCARD_IOCTL_GET_SAMPLE_RATE SOUNDCARD_IOCTL_GET_SAMPLE_RATE #define STORAGE_DEVICE_GET_SIZE STORAGE_DEVICE_GET_SIZE diff --git a/Kernel/API/Syscall.h b/Kernel/API/Syscall.h index 565b5e1fe2..55df161b43 100644 --- a/Kernel/API/Syscall.h +++ b/Kernel/API/Syscall.h @@ -82,6 +82,8 @@ enum class NeedsBigProcessLock { S(fork, NeedsBigProcessLock::No) \ S(fstat, NeedsBigProcessLock::No) \ S(fstatvfs, NeedsBigProcessLock::No) \ + S(fsopen, NeedsBigProcessLock::No) \ + S(fsmount, NeedsBigProcessLock::No) \ S(fsync, NeedsBigProcessLock::No) \ S(ftruncate, NeedsBigProcessLock::No) \ S(futex, NeedsBigProcessLock::Yes) \ @@ -128,7 +130,6 @@ enum class NeedsBigProcessLock { S(mkdir, NeedsBigProcessLock::No) \ S(mknod, NeedsBigProcessLock::No) \ S(mmap, NeedsBigProcessLock::No) \ - S(mount, NeedsBigProcessLock::Yes) \ S(mprotect, NeedsBigProcessLock::No) \ S(mremap, NeedsBigProcessLock::No) \ S(msync, NeedsBigProcessLock::No) \ @@ -432,9 +433,19 @@ struct SC_rename_params { StringArgument new_path; }; -struct SC_mount_params { - StringArgument target; +struct SC_fsopen_params { StringArgument fs_type; + int flags; +}; + +struct SC_fsmount_params { + int mount_fd; + StringArgument target; + int source_fd; +}; + +struct SC_bindmount_params { + StringArgument target; int source_fd; int flags; }; @@ -444,12 +455,6 @@ struct SC_remount_params { int flags; }; -struct SC_bindmount_params { - StringArgument target; - int source_fd; - int flags; -}; - struct SC_pledge_params { StringArgument promises; StringArgument execpromises; diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 7c508ab334..ffe4fb1c19 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -138,6 +138,7 @@ set(KERNEL_SOURCES FileSystem/ISO9660FS/FileSystem.cpp FileSystem/ISO9660FS/Inode.cpp FileSystem/Mount.cpp + FileSystem/MountFile.cpp FileSystem/OpenFileDescription.cpp FileSystem/Plan9FS/FileSystem.cpp FileSystem/Plan9FS/Inode.cpp diff --git a/Kernel/Devices/Storage/StorageManagement.cpp b/Kernel/Devices/Storage/StorageManagement.cpp index 00d96e32d8..b535137bae 100644 --- a/Kernel/Devices/Storage/StorageManagement.cpp +++ b/Kernel/Devices/Storage/StorageManagement.cpp @@ -456,7 +456,9 @@ NonnullRefPtr StorageManagement::root_filesystem() const auto description_or_error = OpenFileDescription::try_create(boot_device_description.release_nonnull()); VERIFY(!description_or_error.is_error()); - auto file_system = Ext2FS::try_create(description_or_error.release_value()).release_value(); + Array mount_specific_data; + mount_specific_data.fill(0); + auto file_system = Ext2FS::try_create(description_or_error.release_value(), mount_specific_data.span()).release_value(); if (auto result = file_system->initialize(); result.is_error()) { dump_storage_devices_and_partitions(); diff --git a/Kernel/FileSystem/DevPtsFS/FileSystem.cpp b/Kernel/FileSystem/DevPtsFS/FileSystem.cpp index 304f2ae7c3..b33f969500 100644 --- a/Kernel/FileSystem/DevPtsFS/FileSystem.cpp +++ b/Kernel/FileSystem/DevPtsFS/FileSystem.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ErrorOr> DevPtsFS::try_create() +ErrorOr> DevPtsFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) DevPtsFS)); } diff --git a/Kernel/FileSystem/DevPtsFS/FileSystem.h b/Kernel/FileSystem/DevPtsFS/FileSystem.h index 0ee1dcf801..56dd51f7e5 100644 --- a/Kernel/FileSystem/DevPtsFS/FileSystem.h +++ b/Kernel/FileSystem/DevPtsFS/FileSystem.h @@ -20,7 +20,7 @@ class DevPtsFS final : public FileSystem { public: virtual ~DevPtsFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "DevPtsFS"sv; } diff --git a/Kernel/FileSystem/Ext2FS/FileSystem.cpp b/Kernel/FileSystem/Ext2FS/FileSystem.cpp index 388f44d2bc..d0f3e75038 100644 --- a/Kernel/FileSystem/Ext2FS/FileSystem.cpp +++ b/Kernel/FileSystem/Ext2FS/FileSystem.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ErrorOr> Ext2FS::try_create(OpenFileDescription& file_description) +ErrorOr> Ext2FS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) Ext2FS(file_description))); } diff --git a/Kernel/FileSystem/Ext2FS/FileSystem.h b/Kernel/FileSystem/Ext2FS/FileSystem.h index 08c75268a7..40096e1723 100644 --- a/Kernel/FileSystem/Ext2FS/FileSystem.h +++ b/Kernel/FileSystem/Ext2FS/FileSystem.h @@ -27,7 +27,7 @@ public: FileSize64bits = 1 << 1, }; - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~Ext2FS() override; diff --git a/Kernel/FileSystem/FATFS/FileSystem.cpp b/Kernel/FileSystem/FATFS/FileSystem.cpp index 728738866f..99d3465dbe 100644 --- a/Kernel/FileSystem/FATFS/FileSystem.cpp +++ b/Kernel/FileSystem/FATFS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> FATFS::try_create(OpenFileDescription& file_description) +ErrorOr> FATFS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) FATFS(file_description))); } diff --git a/Kernel/FileSystem/FATFS/FileSystem.h b/Kernel/FileSystem/FATFS/FileSystem.h index 7b0949f5c0..d02f2c96ca 100644 --- a/Kernel/FileSystem/FATFS/FileSystem.h +++ b/Kernel/FileSystem/FATFS/FileSystem.h @@ -21,7 +21,7 @@ class FATFS final : public BlockBasedFileSystem { friend FATInode; public: - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~FATFS() override = default; virtual StringView class_name() const override { return "FATFS"sv; } diff --git a/Kernel/FileSystem/File.h b/Kernel/FileSystem/File.h index f1601e8f3f..63e8c6d7a3 100644 --- a/Kernel/FileSystem/File.h +++ b/Kernel/FileSystem/File.h @@ -114,6 +114,7 @@ public: virtual bool is_character_device() const { return false; } virtual bool is_socket() const { return false; } virtual bool is_inode_watcher() const { return false; } + virtual bool is_mount_file() const { return false; } virtual bool is_regular_file() const { return false; } diff --git a/Kernel/FileSystem/ISO9660FS/FileSystem.cpp b/Kernel/FileSystem/ISO9660FS/FileSystem.cpp index c95d35f64f..e7b17617f2 100644 --- a/Kernel/FileSystem/ISO9660FS/FileSystem.cpp +++ b/Kernel/FileSystem/ISO9660FS/FileSystem.cpp @@ -16,7 +16,7 @@ constexpr u32 first_data_area_block = 16; constexpr u32 logical_sector_size = 2048; constexpr u32 max_cached_directory_entries = 128; -ErrorOr> ISO9660FS::try_create(OpenFileDescription& description) +ErrorOr> ISO9660FS::try_create(OpenFileDescription& description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) ISO9660FS(description))); } diff --git a/Kernel/FileSystem/ISO9660FS/FileSystem.h b/Kernel/FileSystem/ISO9660FS/FileSystem.h index ce99465ccb..3b3842573b 100644 --- a/Kernel/FileSystem/ISO9660FS/FileSystem.h +++ b/Kernel/FileSystem/ISO9660FS/FileSystem.h @@ -29,7 +29,7 @@ class ISO9660FS final : public BlockBasedFileSystem { friend ISO9660DirectoryIterator; public: - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~ISO9660FS() override; virtual StringView class_name() const override { return "ISO9660FS"sv; } diff --git a/Kernel/FileSystem/Initializer.h b/Kernel/FileSystem/Initializer.h new file mode 100644 index 0000000000..e1eb094be5 --- /dev/null +++ b/Kernel/FileSystem/Initializer.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2022, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +struct FileSystemInitializer { + StringView short_name; + StringView name; + bool requires_open_file_description { false }; + bool requires_block_device { false }; + bool requires_seekable_file { false }; + ErrorOr> (*create_with_fd)(OpenFileDescription&, ReadonlyBytes) = nullptr; + ErrorOr> (*create)(ReadonlyBytes) = nullptr; + ErrorOr (*handle_mount_boolean_flag)(Span, StringView key, bool) = nullptr; + ErrorOr (*handle_mount_unsigned_integer_flag)(Span, StringView key, u64) = nullptr; + ErrorOr (*handle_mount_signed_integer_flag)(Span, StringView key, i64) = nullptr; + ErrorOr (*handle_mount_ascii_string_flag)(Span, StringView key, StringView value) = nullptr; +}; + +} diff --git a/Kernel/FileSystem/MountFile.cpp b/Kernel/FileSystem/MountFile.cpp new file mode 100644 index 0000000000..b2b6d81e6f --- /dev/null +++ b/Kernel/FileSystem/MountFile.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022-2023, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +ErrorOr> MountFile::create(FileSystemInitializer const& file_system_initializer, int flags) +{ + // NOTE: We should not open a MountFile if someone wants to either remount or bindmount. + // There's a check for this in the fsopen syscall entry handler, but here we just assert + // to ensure this never happens. + VERIFY(!(flags & MS_BIND)); + VERIFY(!(flags & MS_REMOUNT)); + auto mount_specific_data_buffer = TRY(KBuffer::try_create_with_size("Mount Specific Data"sv, PAGE_SIZE, Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow)); + return TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) MountFile(file_system_initializer, flags, move(mount_specific_data_buffer)))); +} + +MountFile::MountFile(FileSystemInitializer const& file_system_initializer, int flags, NonnullOwnPtr mount_specific_data) + : m_flags(flags) + , m_file_system_initializer(file_system_initializer) +{ + m_mount_specific_data.with_exclusive([&](auto& our_mount_specific_data) { + our_mount_specific_data = move(mount_specific_data); + memset(our_mount_specific_data->data(), 0, our_mount_specific_data->size()); + }); +} + +MountFile::~MountFile() = default; + +ErrorOr MountFile::ioctl(OpenFileDescription&, unsigned request, Userspace arg) +{ + return m_mount_specific_data.with_exclusive([&](auto& our_mount_specific_data) -> ErrorOr { + switch (request) { + case MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG: { + auto user_mount_specific_data = static_ptr_cast(arg); + auto mount_specific_data = TRY(copy_typed_from_user(user_mount_specific_data)); + if ((mount_specific_data.value_type == MountSpecificFlag::ValueType::SignedInteger || mount_specific_data.value_type == MountSpecificFlag::ValueType::UnsignedInteger) && mount_specific_data.value_length != 8) + return EDOM; + if (mount_specific_data.key_string_length > MOUNT_SPECIFIC_FLAG_KEY_STRING_MAX_LENGTH) + return ENAMETOOLONG; + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::Boolean && mount_specific_data.value_length == 0) + return EINVAL; + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::Boolean && mount_specific_data.value_addr == nullptr) + return EFAULT; + + // NOTE: We put these limits in place because we assume that don't need to handle huge + // amounts of bytes when trying to handle a mount fs-specific flag. + // Anything larger than these constants (which could be changed if needed) is deemed to + // potentially cause OOM condition, and cannot represent any reasonable and "honest" data + // from userspace. + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::ASCIIString && mount_specific_data.value_length > MOUNT_SPECIFIC_FLAG_NON_ASCII_STRING_TYPE_MAX_LENGTH) + return E2BIG; + if (mount_specific_data.value_type == MountSpecificFlag::ValueType::ASCIIString && mount_specific_data.value_length > MOUNT_SPECIFIC_FLAG_ASCII_STRING_TYPE_MAX_LENGTH) + return E2BIG; + + // NOTE: We enforce that the passed argument will be either i64 or u64, so it will always be + // exactly 8 bytes. We do that to simplify handling of integers as well as to ensure ABI correctness + // in all possible cases. + auto key_string = TRY(try_copy_kstring_from_user(reinterpret_cast(mount_specific_data.key_string_addr), static_cast(mount_specific_data.key_string_length))); + switch (mount_specific_data.value_type) { + // NOTE: This is actually considered as simply boolean flag. + case MountSpecificFlag::ValueType::Boolean: { + VERIFY(m_file_system_initializer.handle_mount_boolean_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + if (value_integer != 0 && value_integer != 1) + return EDOM; + bool value = (value_integer == 1) ? true : false; + TRY(m_file_system_initializer.handle_mount_boolean_flag(our_mount_specific_data->bytes(), key_string->view(), value)); + return {}; + } + case MountSpecificFlag::ValueType::UnsignedInteger: { + VERIFY(m_file_system_initializer.handle_mount_unsigned_integer_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + TRY(m_file_system_initializer.handle_mount_unsigned_integer_flag(our_mount_specific_data->bytes(), key_string->view(), value_integer)); + return {}; + } + case MountSpecificFlag::ValueType::SignedInteger: { + VERIFY(m_file_system_initializer.handle_mount_signed_integer_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + TRY(m_file_system_initializer.handle_mount_signed_integer_flag(our_mount_specific_data->bytes(), key_string->view(), value_integer)); + return {}; + } + case MountSpecificFlag::ValueType::ASCIIString: { + VERIFY(m_file_system_initializer.handle_mount_ascii_string_flag); + auto value_string = TRY(try_copy_kstring_from_user(reinterpret_cast(mount_specific_data.value_addr), static_cast(mount_specific_data.value_length))); + TRY(m_file_system_initializer.handle_mount_ascii_string_flag(our_mount_specific_data->bytes(), key_string->view(), value_string->view())); + return {}; + } + default: + return EINVAL; + } + } + default: + return EINVAL; + } + }); +} + +ErrorOr> MountFile::pseudo_path(OpenFileDescription const&) const +{ + return KString::try_create(":mount-file:"sv); +} + +} diff --git a/Kernel/FileSystem/MountFile.h b/Kernel/FileSystem/MountFile.h new file mode 100644 index 0000000000..bc800b80bb --- /dev/null +++ b/Kernel/FileSystem/MountFile.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Kernel { + +class MountFile final : public File { +public: + static ErrorOr> create(FileSystemInitializer const&, int flags); + virtual ~MountFile() override; + + virtual bool can_read(OpenFileDescription const&, u64) const override { return true; } + virtual bool can_write(OpenFileDescription const&, u64) const override { return true; } + virtual ErrorOr read(OpenFileDescription&, u64, UserOrKernelBuffer&, size_t) override { return ENOTSUP; } + virtual ErrorOr write(OpenFileDescription&, u64, UserOrKernelBuffer const&, size_t) override { return ENOTSUP; } + virtual ErrorOr ioctl(OpenFileDescription&, unsigned request, Userspace arg) override; + virtual ErrorOr> pseudo_path(OpenFileDescription const&) const override; + virtual StringView class_name() const override { return "MountFile"sv; } + + int mount_flags() const { return m_flags; } + + MutexProtected>& mount_file_system_specific_data() { return m_mount_specific_data; } + FileSystemInitializer const& file_system_initializer() const { return m_file_system_initializer; } + +private: + virtual bool is_mount_file() const override { return true; } + + MountFile(FileSystemInitializer const&, int flags, NonnullOwnPtr); + + int const m_flags; + FileSystemInitializer const& m_file_system_initializer; + MutexProtected> m_mount_specific_data; +}; + +} diff --git a/Kernel/FileSystem/OpenFileDescription.cpp b/Kernel/FileSystem/OpenFileDescription.cpp index f69cfeb7bc..169ba8e309 100644 --- a/Kernel/FileSystem/OpenFileDescription.cpp +++ b/Kernel/FileSystem/OpenFileDescription.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -313,6 +314,25 @@ InodeWatcher* OpenFileDescription::inode_watcher() return static_cast(m_file.ptr()); } +bool OpenFileDescription::is_mount_file() const +{ + return m_file->is_mount_file(); +} + +MountFile const* OpenFileDescription::mount_file() const +{ + if (!is_mount_file()) + return nullptr; + return static_cast(m_file.ptr()); +} + +MountFile* OpenFileDescription::mount_file() +{ + if (!is_mount_file()) + return nullptr; + return static_cast(m_file.ptr()); +} + bool OpenFileDescription::is_master_pty() const { return m_file->is_master_pty(); diff --git a/Kernel/FileSystem/OpenFileDescription.h b/Kernel/FileSystem/OpenFileDescription.h index 1b02473c4d..7f0f7fab99 100644 --- a/Kernel/FileSystem/OpenFileDescription.h +++ b/Kernel/FileSystem/OpenFileDescription.h @@ -80,6 +80,10 @@ public: InodeWatcher const* inode_watcher() const; InodeWatcher* inode_watcher(); + bool is_mount_file() const; + MountFile const* mount_file() const; + MountFile* mount_file(); + bool is_master_pty() const; MasterPTY const* master_pty() const; MasterPTY* master_pty(); diff --git a/Kernel/FileSystem/Plan9FS/FileSystem.cpp b/Kernel/FileSystem/Plan9FS/FileSystem.cpp index feadf6d864..47be6a64ac 100644 --- a/Kernel/FileSystem/Plan9FS/FileSystem.cpp +++ b/Kernel/FileSystem/Plan9FS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> Plan9FS::try_create(OpenFileDescription& file_description) +ErrorOr> Plan9FS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) Plan9FS(file_description))); } diff --git a/Kernel/FileSystem/Plan9FS/FileSystem.h b/Kernel/FileSystem/Plan9FS/FileSystem.h index 3be74cd698..f6bb5a8bb7 100644 --- a/Kernel/FileSystem/Plan9FS/FileSystem.h +++ b/Kernel/FileSystem/Plan9FS/FileSystem.h @@ -22,7 +22,7 @@ class Plan9FS final : public FileBackedFileSystem { public: virtual ~Plan9FS() override; - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual bool supports_watchers() const override { return false; } diff --git a/Kernel/FileSystem/ProcFS/FileSystem.cpp b/Kernel/FileSystem/ProcFS/FileSystem.cpp index 59ecafe179..d4cec65c1e 100644 --- a/Kernel/FileSystem/ProcFS/FileSystem.cpp +++ b/Kernel/FileSystem/ProcFS/FileSystem.cpp @@ -11,7 +11,7 @@ namespace Kernel { -ErrorOr> ProcFS::try_create() +ErrorOr> ProcFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) ProcFS)); } diff --git a/Kernel/FileSystem/ProcFS/FileSystem.h b/Kernel/FileSystem/ProcFS/FileSystem.h index 38354d0737..143c283565 100644 --- a/Kernel/FileSystem/ProcFS/FileSystem.h +++ b/Kernel/FileSystem/ProcFS/FileSystem.h @@ -20,7 +20,7 @@ class ProcFS final : public FileSystem { public: virtual ~ProcFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "ProcFS"sv; } diff --git a/Kernel/FileSystem/RAMFS/FileSystem.cpp b/Kernel/FileSystem/RAMFS/FileSystem.cpp index 53ae2eab94..a733da81b1 100644 --- a/Kernel/FileSystem/RAMFS/FileSystem.cpp +++ b/Kernel/FileSystem/RAMFS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> RAMFS::try_create() +ErrorOr> RAMFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) RAMFS)); } diff --git a/Kernel/FileSystem/RAMFS/FileSystem.h b/Kernel/FileSystem/RAMFS/FileSystem.h index a3ba20b1cf..0c141bd131 100644 --- a/Kernel/FileSystem/RAMFS/FileSystem.h +++ b/Kernel/FileSystem/RAMFS/FileSystem.h @@ -18,7 +18,7 @@ class RAMFS final : public FileSystem { public: virtual ~RAMFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "RAMFS"sv; } diff --git a/Kernel/FileSystem/SysFS/FileSystem.cpp b/Kernel/FileSystem/SysFS/FileSystem.cpp index 0e054bddc6..de43a10712 100644 --- a/Kernel/FileSystem/SysFS/FileSystem.cpp +++ b/Kernel/FileSystem/SysFS/FileSystem.cpp @@ -11,7 +11,7 @@ namespace Kernel { -ErrorOr> SysFS::try_create() +ErrorOr> SysFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) SysFS)); } diff --git a/Kernel/FileSystem/SysFS/FileSystem.h b/Kernel/FileSystem/SysFS/FileSystem.h index 793c144052..fcaf6a8d6b 100644 --- a/Kernel/FileSystem/SysFS/FileSystem.h +++ b/Kernel/FileSystem/SysFS/FileSystem.h @@ -20,7 +20,7 @@ class SysFS final : public FileSystem { public: virtual ~SysFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "SysFS"sv; } diff --git a/Kernel/FileSystem/VirtualFileSystem.cpp b/Kernel/FileSystem/VirtualFileSystem.cpp index 7e8e72c136..619bfe89e8 100644 --- a/Kernel/FileSystem/VirtualFileSystem.cpp +++ b/Kernel/FileSystem/VirtualFileSystem.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2022-2023, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ @@ -23,11 +24,60 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include + namespace Kernel { static Singleton s_the; static constexpr int root_mount_flags = 0; +static ErrorOr handle_mount_boolean_flag_as_invalid(Span, StringView, bool) +{ + return EINVAL; +} + +static ErrorOr handle_mount_unsigned_integer_flag_as_invalid(Span, StringView, u64) +{ + return EINVAL; +} + +static ErrorOr handle_mount_signed_integer_flag_as_invalid(Span, StringView, i64) +{ + return EINVAL; +} + +static ErrorOr handle_mount_ascii_string_flag_as_invalid(Span, StringView, StringView) +{ + return EINVAL; +} + +static constexpr FileSystemInitializer s_initializers[] = { + { "proc"sv, "ProcFS"sv, false, false, false, {}, ProcFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "devpts"sv, "DevPtsFS"sv, false, false, false, {}, DevPtsFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "sys"sv, "SysFS"sv, false, false, false, {}, SysFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "ram"sv, "RAMFS"sv, false, false, false, {}, RAMFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "ext2"sv, "Ext2FS"sv, true, true, true, Ext2FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "9p"sv, "Plan9FS"sv, true, true, true, Plan9FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "iso9660"sv, "ISO9660FS"sv, true, true, true, ISO9660FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "fat"sv, "FATFS"sv, true, true, true, FATFS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, +}; + +ErrorOr VirtualFileSystem::find_filesystem_type_initializer(StringView fs_type) +{ + for (auto& initializer_entry : s_initializers) { + if (fs_type == initializer_entry.short_name || fs_type == initializer_entry.name) + return &initializer_entry; + } + return ENODEV; +} + UNMAP_AFTER_INIT void VirtualFileSystem::initialize() { s_the.ensure_instance(); @@ -59,14 +109,14 @@ bool VirtualFileSystem::mount_point_exists_at_inode(InodeIdentifier inode_identi }); } -ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int flags) +ErrorOr VirtualFileSystem::add_file_system_to_mount_table(FileSystem& file_system, Custody& mount_point, int flags) { - auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs, &mount_point, flags))); + auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(file_system, &mount_point, flags))); return m_mounts.with([&](auto& mounts) -> ErrorOr { auto& inode = mount_point.inode(); - dbgln("VirtualFileSystem: FileSystemID {}, Mounting {} at inode {} with flags {}", - fs.fsid(), - fs.class_name(), + dbgln("VirtualFileSystem: FileSystemID {} (non file-backed), Mounting {} at inode {} with flags {}", + file_system.fsid(), + file_system.class_name(), inode.identifier(), flags); if (mount_point_exists_at_inode(inode.identifier())) { @@ -84,14 +134,8 @@ ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int // the FileSystem once it is no longer mounted). if (mounted_count == 1) { m_file_systems_list.with([&](auto& fs_list) { - fs_list.append(fs); + fs_list.append(file_system); }); - if (fs.is_file_backed()) { - auto& file_backed_fs = static_cast(fs); - m_file_backed_file_systems_list.with([&](auto& fs_list) { - fs_list.append(file_backed_fs); - }); - } } }); @@ -102,6 +146,65 @@ ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int }); } +ErrorOr VirtualFileSystem::mount(MountFile& mount_file, OpenFileDescription* source_description, Custody& mount_point, int flags) +{ + auto const& file_system_initializer = mount_file.file_system_initializer(); + if (!source_description) { + if (file_system_initializer.requires_open_file_description) + return ENOTSUP; + if (!file_system_initializer.create) + return ENOTSUP; + RefPtr fs; + TRY(mount_file.mount_file_system_specific_data().with_exclusive([&](auto& mount_specific_data) -> ErrorOr { + fs = TRY(file_system_initializer.create(mount_specific_data->bytes())); + return {}; + })); + VERIFY(fs); + TRY(fs->initialize()); + TRY(add_file_system_to_mount_table(*fs, mount_point, flags)); + return {}; + } + + // NOTE: Although it might be OK to support creating filesystems + // without providing an actual file descriptor to their create() method + // because the caller of this function actually supplied a valid file descriptor, + // this will only make things complicated in the future, so we should block + // this kind of behavior. + if (!file_system_initializer.requires_open_file_description) + return ENOTSUP; + + if (file_system_initializer.requires_block_device && !source_description->file().is_block_device()) + return ENOTBLK; + if (file_system_initializer.requires_seekable_file && !source_description->file().is_seekable()) { + dbgln("mount: this is not a seekable file"); + return ENODEV; + } + + // NOTE: If there's an associated file description with the filesystem, we could + // try to first find it from the VirtualFileSystem filesystem list and if it was not found, + // then create it and add it. + VERIFY(file_system_initializer.create_with_fd); + return m_file_backed_file_systems_list.with_exclusive([&](auto& list) -> ErrorOr { + RefPtr fs; + for (auto& node : list) { + if ((&node.file_description() == source_description) || (&node.file() == &source_description->file())) { + fs = node; + break; + } + } + if (!fs) { + TRY(mount_file.mount_file_system_specific_data().with_exclusive([&](auto& mount_specific_data) -> ErrorOr { + fs = TRY(file_system_initializer.create_with_fd(*source_description, mount_specific_data->bytes())); + return {}; + })); + TRY(fs->initialize()); + } + TRY(add_file_system_to_mount_table(*fs, mount_point, flags)); + list.append(static_cast(*fs)); + return {}; + }); +} + ErrorOr VirtualFileSystem::bind_mount(Custody& source, Custody& mount_point, int flags) { auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(source.inode(), mount_point, flags))); @@ -163,41 +266,42 @@ ErrorOr VirtualFileSystem::unmount(Custody& mountpoint_custody) auto custody_path = TRY(mountpoint_custody.try_serialize_absolute_path()); dbgln("VirtualFileSystem: unmount called with inode {} on mountpoint {}", guest_inode.identifier(), custody_path->view()); - return m_mounts.with([&](auto& mounts) -> ErrorOr { - for (auto& mount : mounts) { - if (&mount.guest() != &guest_inode) - continue; - auto mountpoint_path = TRY(mount.absolute_path()); - if (custody_path->view() != mountpoint_path->view()) - continue; - NonnullRefPtr fs = mount.guest_fs(); - TRY(fs->prepare_to_unmount()); - fs->mounted_count({}).with([&](auto& mounted_count) { - VERIFY(mounted_count > 0); - if (mounted_count == 1) { - dbgln("VirtualFileSystem: Unmounting file system {} for the last time...", fs->fsid()); - m_file_systems_list.with([&](auto& list) { - list.remove(*fs); - }); - if (fs->is_file_backed()) { - dbgln("VirtualFileSystem: Unmounting file backed file system {} for the last time...", fs->fsid()); - auto& file_backed_fs = static_cast(*fs); - m_file_backed_file_systems_list.with([&](auto& list) { - list.remove(file_backed_fs); + return m_file_backed_file_systems_list.with_exclusive([&](auto& file_backed_fs_list) -> ErrorOr { + TRY(m_mounts.with([&](auto& mounts) -> ErrorOr { + for (auto& mount : mounts) { + if (&mount.guest() != &guest_inode) + continue; + auto mountpoint_path = TRY(mount.absolute_path()); + if (custody_path->view() != mountpoint_path->view()) + continue; + NonnullRefPtr fs = mount.guest_fs(); + TRY(fs->prepare_to_unmount()); + fs->mounted_count({}).with([&](auto& mounted_count) { + VERIFY(mounted_count > 0); + if (mounted_count == 1) { + dbgln("VirtualFileSystem: Unmounting file system {} for the last time...", fs->fsid()); + m_file_systems_list.with([&](auto& list) { + list.remove(*fs); }); + if (fs->is_file_backed()) { + dbgln("VirtualFileSystem: Unmounting file backed file system {} for the last time...", fs->fsid()); + auto& file_backed_fs = static_cast(*fs); + file_backed_fs_list.remove(file_backed_fs); + } + } else { + mounted_count--; } - } else { - mounted_count--; - } - }); - dbgln("VirtualFileSystem: Unmounting file system {}...", fs->fsid()); - mount.m_vfs_list_node.remove(); - // Note: This is balanced by a `new` statement that is happening in various places before inserting the Mount object to the list. - delete &mount; - return {}; - } - dbgln("VirtualFileSystem: Nothing mounted on inode {}", guest_inode.identifier()); - return ENODEV; + }); + dbgln("VirtualFileSystem: Unmounting file system {}...", fs->fsid()); + mount.m_vfs_list_node.remove(); + // NOTE: This is balanced by a `new` statement that is happening in various places before inserting the Mount object to the list. + delete &mount; + return {}; + } + dbgln("VirtualFileSystem: Nothing mounted on inode {}", guest_inode.identifier()); + return ENODEV; + })); + return {}; }); } @@ -219,7 +323,7 @@ ErrorOr VirtualFileSystem::mount_root(FileSystem& fs) if (fs.is_file_backed()) { auto pseudo_path = TRY(static_cast(fs).file_description().pseudo_path()); dmesgln("VirtualFileSystem: mounted root({}) from {} ({})", fs.fsid(), fs.class_name(), pseudo_path); - m_file_backed_file_systems_list.with([&](auto& list) { + m_file_backed_file_systems_list.with_exclusive([&](auto& list) { list.append(static_cast(fs)); }); } else { @@ -339,28 +443,6 @@ ErrorOr VirtualFileSystem::lookup_metadata(Credentials const& cre return custody->inode().metadata(); } -ErrorOr> VirtualFileSystem::find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function>(OpenFileDescription&)> callback) -{ - return TRY(m_file_backed_file_systems_list.with([&](auto& list) -> ErrorOr> { - for (auto& node : list) { - if (&node.file_description() == &description) { - return node; - } - if (&node.file() == &description.file()) { - return node; - } - } - auto fs = TRY(callback(description)); - - // The created FileSystem is only added to the file_systems_lists - // when the FS has been successfully initialized and mounted - // (in VirtualFileSystem::mount()). This prevents file systems which - // fail to initialize or mount from existing in the list when the - // FileSystem is destroyed after failure. - return static_ptr_cast(fs); - })); -} - ErrorOr> VirtualFileSystem::open(Credentials const& credentials, StringView path, int options, mode_t mode, Custody& base, Optional owner) { return open(Process::current(), credentials, path, options, mode, base, owner); diff --git a/Kernel/FileSystem/VirtualFileSystem.h b/Kernel/FileSystem/VirtualFileSystem.h index f23d5088bf..7491c8a82d 100644 --- a/Kernel/FileSystem/VirtualFileSystem.h +++ b/Kernel/FileSystem/VirtualFileSystem.h @@ -14,11 +14,14 @@ #include #include #include +#include #include #include #include +#include #include #include +#include #include namespace Kernel { @@ -49,11 +52,13 @@ public: static void initialize(); static VirtualFileSystem& the(); + static ErrorOr find_filesystem_type_initializer(StringView fs_type); + VirtualFileSystem(); ~VirtualFileSystem(); ErrorOr mount_root(FileSystem&); - ErrorOr mount(FileSystem&, Custody& mount_point, int flags); + ErrorOr mount(MountFile&, OpenFileDescription*, Custody& mount_point, int flags); ErrorOr bind_mount(Custody& source, Custody& mount_point, int flags); ErrorOr remount(Custody& mount_point, int new_flags); ErrorOr unmount(Custody& mount_point); @@ -82,8 +87,6 @@ public: ErrorOr for_each_mount(Function(Mount const&)>) const; - ErrorOr> find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function>(OpenFileDescription&)> callback); - InodeIdentifier root_inode_id() const; void sync_filesystems(); @@ -105,6 +108,8 @@ private: ErrorOr validate_path_against_process_veil(Custody const& path, int options); ErrorOr validate_path_against_process_veil(StringView path, int options); + ErrorOr add_file_system_to_mount_table(FileSystem& file_system, Custody& mount_point, int flags); + bool is_vfs_root(InodeIdentifier) const; ErrorOr traverse_directory_inode(Inode&, Function(FileSystem::DirectoryEntryView const&)>); @@ -120,7 +125,14 @@ private: SpinlockProtected, LockRank::None> m_root_custody {}; SpinlockProtected, LockRank::None> m_mounts {}; - SpinlockProtected, LockRank::None> m_file_backed_file_systems_list {}; + + // NOTE: The FileBackedFileSystem list is protected by a mutex because we need to scan it + // to search for existing filesystems for already used block devices and therefore when doing + // that we could fail to find a filesystem so we need to create a new filesystem which might + // need to do disk access (i.e. taking Mutexes in other places) and then register that new filesystem + // in this list, to avoid TOCTOU bugs. + MutexProtected> m_file_backed_file_systems_list {}; + SpinlockProtected, LockRank::FileSystem> m_file_systems_list {}; }; diff --git a/Kernel/Forward.h b/Kernel/Forward.h index e12681f812..8d3f7f6c9d 100644 --- a/Kernel/Forward.h +++ b/Kernel/Forward.h @@ -31,6 +31,7 @@ class IPv4Socket; class Inode; class InodeIdentifier; class InodeWatcher; +class MountFile; class Jail; class KBuffer; class KString; diff --git a/Kernel/Syscalls/mount.cpp b/Kernel/Syscalls/mount.cpp index ae4fedc979..80fbd32367 100644 --- a/Kernel/Syscalls/mount.cpp +++ b/Kernel/Syscalls/mount.cpp @@ -1,120 +1,68 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2022, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include namespace Kernel { -struct FileSystemInitializer { - StringView short_name; - StringView name; - bool requires_open_file_description { false }; - bool requires_block_device { false }; - bool requires_seekable_file { false }; - ErrorOr> (*create_with_fd)(OpenFileDescription&) = nullptr; - ErrorOr> (*create)(void) = nullptr; -}; - -static constexpr FileSystemInitializer s_initializers[] = { - { "proc"sv, "ProcFS"sv, false, false, false, {}, ProcFS::try_create }, - { "devpts"sv, "DevPtsFS"sv, false, false, false, {}, DevPtsFS::try_create }, - { "sys"sv, "SysFS"sv, false, false, false, {}, SysFS::try_create }, - { "ram"sv, "RAMFS"sv, false, false, false, {}, RAMFS::try_create }, - { "ext2"sv, "Ext2FS"sv, true, true, true, Ext2FS::try_create, {} }, - { "9p"sv, "Plan9FS"sv, true, true, true, Plan9FS::try_create, {} }, - { "iso9660"sv, "ISO9660FS"sv, true, true, true, ISO9660FS::try_create, {} }, - { "fat"sv, "FATFS"sv, true, true, true, FATFS::try_create, {} }, -}; - -static ErrorOr> find_or_create_filesystem_instance(StringView fs_type, OpenFileDescription* possible_description) +ErrorOr Process::sys$fsopen(Userspace user_params) { - for (auto& initializer_entry : s_initializers) { - if (fs_type != initializer_entry.short_name && fs_type != initializer_entry.name) - continue; - if (!initializer_entry.requires_open_file_description) { - VERIFY(initializer_entry.create); - NonnullRefPtr fs = TRY(initializer_entry.create()); - return fs; - } - // Note: If there's an associated file description with the filesystem, we could - // try to first find it from the VirtualFileSystem filesystem list and if it was not found, - // then create it and add it. - VERIFY(initializer_entry.create_with_fd); - if (!possible_description) - return EBADF; - OpenFileDescription& description = *possible_description; - - if (initializer_entry.requires_block_device && !description.file().is_block_device()) - return ENOTBLK; - if (initializer_entry.requires_seekable_file && !description.file().is_seekable()) { - dbgln("mount: this is not a seekable file"); - return ENODEV; - } - return TRY(VirtualFileSystem::the().find_already_existing_or_create_file_backed_file_system(description, initializer_entry.create_with_fd)); - } - return ENODEV; -} - -ErrorOr Process::sys$mount(Userspace user_params) -{ - VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); - TRY(require_no_promises()); + VERIFY_NO_PROCESS_BIG_LOCK(this); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) - return EPERM; + return Error::from_errno(EPERM); + auto params = TRY(copy_typed_from_user(user_params)); + auto fs_type_string = TRY(try_copy_kstring_from_user(params.fs_type)); + + // NOTE: If some userspace program uses MS_REMOUNT, return EINVAL to indicate that we never want this + // flag to appear in the mount table... + if (params.flags & MS_REMOUNT || params.flags & MS_BIND) + return Error::from_errno(EINVAL); + + auto const* fs_type_initializer = TRY(VirtualFileSystem::find_filesystem_type_initializer(fs_type_string->view())); + VERIFY(fs_type_initializer); + auto mount_file = TRY(MountFile::create(*fs_type_initializer, params.flags)); + auto description = TRY(OpenFileDescription::try_create(move(mount_file))); + return m_fds.with_exclusive([&](auto& fds) -> ErrorOr { + auto new_fd = TRY(fds.allocate()); + fds[new_fd.fd].set(move(description), FD_CLOEXEC); + return new_fd.fd; + }); +} + +ErrorOr Process::sys$fsmount(Userspace user_params) +{ + VERIFY_NO_PROCESS_BIG_LOCK(this); + TRY(require_promise(Pledge::mount)); + auto credentials = this->credentials(); + if (!credentials->is_superuser()) + return Error::from_errno(EPERM); auto params = TRY(copy_typed_from_user(user_params)); - if (params.flags & MS_REMOUNT) - return EINVAL; - if (params.flags & MS_BIND) - return EINVAL; + auto mount_description = TRY(open_file_description(params.mount_fd)); + if (!mount_description->is_mount_file()) + return Error::from_errno(EINVAL); - auto source_fd = params.source_fd; + RefPtr source_description = TRY(open_file_description_ignoring_negative(params.source_fd)); auto target = TRY(try_copy_kstring_from_user(params.target)); - auto fs_type_string = TRY(try_copy_kstring_from_user(params.fs_type)); - auto fs_type = fs_type_string->view(); - - auto description_or_error = open_file_description(source_fd); - if (!description_or_error.is_error()) - dbgln("mount {}: source fd {} @ {}", fs_type, source_fd, target); - else - dbgln("mount {} @ {}", fs_type, target); - auto target_custody = TRY(VirtualFileSystem::the().resolve_path(credentials, target->view(), current_directory())); - - RefPtr fs; - - if (!description_or_error.is_error()) { - auto description = description_or_error.release_value(); - fs = TRY(find_or_create_filesystem_instance(fs_type, description.ptr())); - auto source_pseudo_path = TRY(description->pseudo_path()); - dbgln("mount: attempting to mount {} on {}", source_pseudo_path, target); - } else { - fs = TRY(find_or_create_filesystem_instance(fs_type, {})); - } - - TRY(fs->initialize()); - TRY(VirtualFileSystem::the().mount(*fs, target_custody, params.flags)); + auto flags = mount_description->mount_file()->mount_flags(); + TRY(VirtualFileSystem::the().mount(*mount_description->mount_file(), source_description.ptr(), target_custody, flags)); return 0; } ErrorOr Process::sys$remount(Userspace user_params) { VERIFY_NO_PROCESS_BIG_LOCK(this); - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) return EPERM; @@ -134,7 +82,7 @@ ErrorOr Process::sys$remount(Userspace Process::sys$bindmount(Userspace user_params) { VERIFY_NO_PROCESS_BIG_LOCK(this); - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) return EPERM; @@ -166,7 +114,7 @@ ErrorOr Process::sys$umount(Userspace user_mountpoint, siz if (!credentials->is_superuser()) return EPERM; - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto mountpoint = TRY(get_syscall_path_argument(user_mountpoint, mountpoint_length)); auto custody = TRY(VirtualFileSystem::the().resolve_path(credentials, mountpoint->view(), current_directory())); diff --git a/Kernel/Tasks/Process.h b/Kernel/Tasks/Process.h index a7b86535c8..97b43bb3ce 100644 --- a/Kernel/Tasks/Process.h +++ b/Kernel/Tasks/Process.h @@ -70,6 +70,7 @@ UnixDateTime kgettimeofday(); __ENUMERATE_PLEDGE_PROMISE(map_fixed) \ __ENUMERATE_PLEDGE_PROMISE(getkeymap) \ __ENUMERATE_PLEDGE_PROMISE(jail) \ + __ENUMERATE_PLEDGE_PROMISE(mount) \ __ENUMERATE_PLEDGE_PROMISE(no_error) enum class Pledge : u32 { @@ -395,7 +396,8 @@ public: ErrorOr sys$unlink(int dirfd, Userspace pathname, size_t path_length, int flags); ErrorOr sys$symlink(Userspace); ErrorOr sys$rmdir(Userspace pathname, size_t path_length); - ErrorOr sys$mount(Userspace); + ErrorOr sys$fsmount(Userspace); + ErrorOr sys$fsopen(Userspace); ErrorOr sys$umount(Userspace mountpoint, size_t mountpoint_length); ErrorOr sys$chmod(Userspace); ErrorOr sys$fchmod(int fd, mode_t); @@ -826,11 +828,25 @@ public: return m_fds.with_shared([fd](auto& fds) { return fds.open_file_description(fd); }); } + ErrorOr> open_file_description_ignoring_negative(int fd) + { + if (fd < 0) + return nullptr; + return open_file_description(fd); + } + ErrorOr> open_file_description(int fd) const { return m_fds.with_shared([fd](auto& fds) { return fds.open_file_description(fd); }); } + ErrorOr> open_file_description_ignoring_negative(int fd) const + { + if (fd < 0) + return nullptr; + return open_file_description(fd); + } + ErrorOr allocate_fd() { return m_fds.with_exclusive([](auto& fds) { return fds.allocate(); }); diff --git a/Userland/DevTools/UserspaceEmulator/Emulator.h b/Userland/DevTools/UserspaceEmulator/Emulator.h index 404468cff8..adb1fc7e44 100644 --- a/Userland/DevTools/UserspaceEmulator/Emulator.h +++ b/Userland/DevTools/UserspaceEmulator/Emulator.h @@ -147,6 +147,7 @@ private: int virt$anon_create(size_t, int); int virt$beep(); int virt$bind(int sockfd, FlatPtr address, socklen_t address_length); + u32 virt$bindmount(u32 params_addr); int virt$chdir(FlatPtr, size_t); int virt$chmod(FlatPtr); int virt$chown(FlatPtr); @@ -167,6 +168,8 @@ private: int virt$fchown(int, uid_t, gid_t); u32 virt$fcntl(int fd, int, u32); int virt$fork(); + u32 virt$fsopen(u32); + u32 virt$fsmount(u32); int virt$fstat(int, FlatPtr); int virt$ftruncate(int fd, FlatPtr length_addr); int virt$futex(FlatPtr); @@ -199,7 +202,6 @@ private: u32 virt$madvise(FlatPtr, size_t, int); int virt$mkdir(FlatPtr path, size_t path_length, mode_t mode); u32 virt$mmap(u32); - u32 virt$mount(u32); u32 virt$mprotect(FlatPtr, size_t, int); FlatPtr virt$mremap(FlatPtr); int virt$annotate_mapping(FlatPtr); @@ -219,6 +221,7 @@ private: int virt$recvfd(int, int); int virt$recvmsg(int sockfd, FlatPtr msg_addr, int flags); int virt$rename(FlatPtr address); + u32 virt$remount(u32); int virt$rmdir(FlatPtr path, size_t path_length); int virt$scheduler_get_parameters(FlatPtr); int virt$scheduler_set_parameters(FlatPtr); diff --git a/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp b/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp index fba7caef92..f92f240167 100644 --- a/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp +++ b/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp @@ -51,6 +51,8 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$beep(); case SC_bind: return virt$bind(arg1, arg2, arg3); + case SC_bindmount: + return virt$bindmount(arg1); case SC_chdir: return virt$chdir(arg1, arg2); case SC_chmod: @@ -94,6 +96,10 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$fork(); case SC_fstat: return virt$fstat(arg1, arg2); + case SC_fsmount: + return virt$fsopen(arg1); + case SC_fsopen: + return virt$fsopen(arg1); case SC_ftruncate: return virt$ftruncate(arg1, arg2); case SC_futex: @@ -158,8 +164,6 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$mkdir(arg1, arg2, arg3); case SC_mmap: return virt$mmap(arg1); - case SC_mount: - return virt$mount(arg1); case SC_mprotect: return virt$mprotect(arg1, arg2, arg3); case SC_mremap: @@ -196,6 +200,8 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$recvmsg(arg1, arg2, arg3); case SC_rename: return virt$rename(arg1); + case SC_remount: + return virt$remount(arg1); case SC_rmdir: return virt$rmdir(arg1, arg2); case SC_scheduler_get_parameters: @@ -906,18 +912,48 @@ FlatPtr Emulator::virt$mremap(FlatPtr params_addr) return -EINVAL; } -u32 Emulator::virt$mount(u32 params_addr) +u32 Emulator::virt$bindmount(u32 params_addr) { - Syscall::SC_mount_params params; + Syscall::SC_bindmount_params params; mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); - auto fs_path = mmu().copy_buffer_from_vm((FlatPtr)params.fs_type.characters, params.fs_type.length); - params.fs_type.characters = (char*)fs_path.data(); - params.fs_type.length = fs_path.size(); params.target.characters = (char*)target.data(); params.target.length = target.size(); - return syscall(SC_mount, ¶ms); + return syscall(SC_bindmount, ¶ms); +} + +u32 Emulator::virt$remount(u32 params_addr) +{ + Syscall::SC_remount_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); + params.target.characters = (char*)target.data(); + params.target.length = target.size(); + + return syscall(SC_remount, ¶ms); +} + +u32 Emulator::virt$fsopen(u32 params_addr) +{ + Syscall::SC_fsopen_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto fs_type = mmu().copy_buffer_from_vm((FlatPtr)params.fs_type.characters, params.fs_type.length); + params.fs_type.characters = (char*)fs_type.data(); + params.fs_type.length = fs_type.size(); + + return syscall(SC_fsopen, ¶ms); +} + +u32 Emulator::virt$fsmount(u32 params_addr) +{ + Syscall::SC_fsmount_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); + params.target.characters = (char*)target.data(); + params.target.length = target.size(); + + return syscall(SC_fsmount, ¶ms); } u32 Emulator::virt$gettid() diff --git a/Userland/Libraries/LibC/unistd.cpp b/Userland/Libraries/LibC/unistd.cpp index 12864f6a57..beff6b4127 100644 --- a/Userland/Libraries/LibC/unistd.cpp +++ b/Userland/Libraries/LibC/unistd.cpp @@ -940,23 +940,65 @@ int fsync(int fd) __RETURN_WITH_ERRNO(rc, rc, -1); } -int mount(int source_fd, char const* target, char const* fs_type, int flags) +int fsopen(char const* fs_type, int flags) { - if (!target || !fs_type) { + if (!fs_type) { errno = EFAULT; return -1; } - Syscall::SC_mount_params params { - { target, strlen(target) }, + Syscall::SC_fsopen_params params { { fs_type, strlen(fs_type) }, - source_fd, - flags + flags, }; - int rc = syscall(SC_mount, ¶ms); + int rc = syscall(SC_fsopen, ¶ms); __RETURN_WITH_ERRNO(rc, rc, -1); } +int fsmount(int mount_fd, int source_fd, char const* target) +{ + if (!target) { + errno = EFAULT; + return -1; + } + + Syscall::SC_fsmount_params params { + mount_fd, + { target, strlen(target) }, + source_fd, + }; + int rc = syscall(SC_fsmount, ¶ms); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + +int bindmount(int source_fd, char const* target, int flags) +{ + if (!target) { + errno = EFAULT; + return -1; + } + + Syscall::SC_bindmount_params params { + { target, strlen(target) }, + source_fd, + flags, + }; + int rc = syscall(SC_bindmount, ¶ms); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + +int mount(int source_fd, char const* target, char const* fs_type, int flags) +{ + if (flags & MS_BIND) + return bindmount(source_fd, target, flags); + + int mount_fd = fsopen(fs_type, flags); + if (mount_fd < 0) + return -1; + + return fsmount(mount_fd, source_fd, target); +} + int umount(char const* mountpoint) { int rc = syscall(SC_umount, mountpoint, strlen(mountpoint)); diff --git a/Userland/Libraries/LibC/unistd.h b/Userland/Libraries/LibC/unistd.h index 47e5a2fcdb..ef29f6c969 100644 --- a/Userland/Libraries/LibC/unistd.h +++ b/Userland/Libraries/LibC/unistd.h @@ -117,6 +117,9 @@ int fchown(int fd, uid_t, gid_t); int fchownat(int fd, char const* pathname, uid_t uid, gid_t gid, int flags); int ftruncate(int fd, off_t length); int truncate(char const* path, off_t length); +int fsopen(char const* fs_type, int flags); +int fsmount(int mount_fd, int source_fd, char const* target); +int bindmount(int source_fd, char const* target, int flags); int mount(int source_fd, char const* target, char const* fs_type, int flags); int umount(char const* mountpoint); int pledge(char const* promises, char const* execpromises); diff --git a/Userland/Libraries/LibCore/System.cpp b/Userland/Libraries/LibCore/System.cpp index 8feb4ec4c1..7cacceda78 100644 --- a/Userland/Libraries/LibCore/System.cpp +++ b/Userland/Libraries/LibCore/System.cpp @@ -263,14 +263,43 @@ ErrorOr mount(int source_fd, StringView target, StringView fs_type, int fl if (target.is_null() || fs_type.is_null()) return Error::from_errno(EFAULT); - Syscall::SC_mount_params params { - { target.characters_without_null_termination(), target.length() }, + if (flags & MS_REMOUNT) { + TRY(remount(target, flags)); + return {}; + } + if (flags & MS_BIND) { + TRY(bindmount(source_fd, target, flags)); + return {}; + } + int mount_fd = TRY(fsopen(fs_type, flags)); + return fsmount(mount_fd, source_fd, target); +} + +ErrorOr fsopen(StringView fs_type, int flags) +{ + if (fs_type.is_null()) + return Error::from_errno(EFAULT); + + Syscall::SC_fsopen_params params { { fs_type.characters_without_null_termination(), fs_type.length() }, - source_fd, - flags + flags, }; - int rc = syscall(SC_mount, ¶ms); - HANDLE_SYSCALL_RETURN_VALUE("mount", rc, {}); + int rc = syscall(SC_fsopen, ¶ms); + HANDLE_SYSCALL_RETURN_VALUE("fsopen", rc, rc); +} + +ErrorOr fsmount(int mount_fd, int source_fd, StringView target) +{ + if (target.is_null()) + return Error::from_errno(EFAULT); + + Syscall::SC_fsmount_params params { + mount_fd, + { target.characters_without_null_termination(), target.length() }, + source_fd, + }; + int rc = syscall(SC_fsmount, ¶ms); + HANDLE_SYSCALL_RETURN_VALUE("fsmount", rc, {}); } ErrorOr umount(StringView mount_point) diff --git a/Userland/Libraries/LibCore/System.h b/Userland/Libraries/LibCore/System.h index 912f2f4941..9527e50af7 100644 --- a/Userland/Libraries/LibCore/System.h +++ b/Userland/Libraries/LibCore/System.h @@ -60,6 +60,8 @@ ErrorOr recvfd(int sockfd, int options); ErrorOr ptrace_peekbuf(pid_t tid, void const* tracee_addr, Bytes destination_buf); ErrorOr mount(int source_fd, StringView target, StringView fs_type, int flags); ErrorOr bindmount(int source_fd, StringView target, int flags); +ErrorOr fsopen(StringView fs_type, int flags); +ErrorOr fsmount(int mount_fd, int source_fd, StringView target_path); ErrorOr remount(StringView target, int flags); ErrorOr umount(StringView mount_point); ErrorOr ptrace(int request, pid_t tid, void* address, void* data);