From 16850423cf16183f4304ded1dd135109e4625fe0 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 25 Dec 2021 23:44:04 +0100 Subject: [PATCH] Kernel: Fix deadlock caused by page faults while holding disk cache lock If the data passed to sys$write() is backed by a not-yet-paged-in inode mapping, we could end up in a situation where we get a page fault when trying to copy data from userspace. If that page fault handler tried reading from an inode that someone else had locked while waiting for the disk cache lock, we'd deadlock. This patch fixes the issue by copying the userspace data into a local buffer before acquiring the disk cache lock. This is not ideal since it incurs an extra copy, and I'm sure we can think of a better solution eventually. This was a frequent cause of startup deadlocks on x86_64 for me. :^) --- Kernel/FileSystem/BlockBasedFileSystem.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Kernel/FileSystem/BlockBasedFileSystem.cpp b/Kernel/FileSystem/BlockBasedFileSystem.cpp index 2b329a7b9b..cf2916d3d1 100644 --- a/Kernel/FileSystem/BlockBasedFileSystem.cpp +++ b/Kernel/FileSystem/BlockBasedFileSystem.cpp @@ -133,6 +133,16 @@ ErrorOr BlockBasedFileSystem::write_block(BlockIndex index, const UserOrKe VERIFY(offset + count <= block_size()); dbgln_if(BBFS_DEBUG, "BlockBasedFileSystem::write_block {}, size={}", index, count); + // NOTE: We copy the `data` to write into a local buffer before taking the cache lock. + // This makes sure any page faults caused by accessing the data will occur before + // we tie down the cache. + auto buffered_data_or_error = ByteBuffer::create_uninitialized(count); + if (!buffered_data_or_error.has_value()) + return ENOMEM; + auto buffered_data = buffered_data_or_error.release_value(); + + TRY(data.read(buffered_data.bytes())); + return m_cache.with_exclusive([&](auto& cache) -> ErrorOr { if (!allow_cache) { flush_specific_block_if_needed(index); @@ -147,7 +157,7 @@ ErrorOr BlockBasedFileSystem::write_block(BlockIndex index, const UserOrKe // Fill the cache first. TRY(read_block(index, nullptr, block_size())); } - TRY(data.read(entry.data + offset, count)); + memcpy(entry.data + offset, buffered_data.data(), count); cache->mark_dirty(entry); entry.has_data = true;