1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 20:27:45 +00:00

Kernel: Implement O_DIRECT open() flag to bypass disk caches

Files opened with O_DIRECT will now bypass the disk cache in read/write
operations (though metadata operations will still hit the disk cache.)

This will allow us to test actual disk performance instead of testing
disk *cache* performance, if that's what we want. :^)

There's room for improvment here, we're very aggressively flushing any
dirty cache entries for the specific block before reading/writing that
block. This is done by walking the entire cache, which may be slow.
This commit is contained in:
Andreas Kling 2019-11-05 19:35:12 +01:00
parent 3de3daf765
commit 59ed235c85
7 changed files with 59 additions and 18 deletions

View file

@ -1,5 +1,6 @@
#include <Kernel/Arch/i386/CPU.h> #include <Kernel/Arch/i386/CPU.h>
#include <Kernel/FileSystem/DiskBackedFileSystem.h> #include <Kernel/FileSystem/DiskBackedFileSystem.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/KBuffer.h> #include <Kernel/KBuffer.h>
#include <Kernel/Process.h> #include <Kernel/Process.h>
@ -92,11 +93,21 @@ DiskBackedFS::~DiskBackedFS()
{ {
} }
bool DiskBackedFS::write_block(unsigned index, const u8* data) bool DiskBackedFS::write_block(unsigned index, const u8* data, FileDescription* description)
{ {
#ifdef DBFS_DEBUG #ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::write_block %u, size=%u\n", index, data.size()); kprintf("DiskBackedFileSystem::write_block %u, size=%u\n", index, data.size());
#endif #endif
bool allow_cache = !description || !description->is_direct();
if (!allow_cache) {
flush_specific_block_if_needed(index);
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
device().write(base_offset, block_size(), data);
return true;
}
auto& entry = cache().get(index); auto& entry = cache().get(index);
memcpy(entry.data, data, block_size()); memcpy(entry.data, data, block_size());
entry.is_dirty = true; entry.is_dirty = true;
@ -106,22 +117,32 @@ bool DiskBackedFS::write_block(unsigned index, const u8* data)
return true; return true;
} }
bool DiskBackedFS::write_blocks(unsigned index, unsigned count, const u8* data) bool DiskBackedFS::write_blocks(unsigned index, unsigned count, const u8* data, FileDescription* description)
{ {
#ifdef DBFS_DEBUG #ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::write_blocks %u x%u\n", index, count); kprintf("DiskBackedFileSystem::write_blocks %u x%u\n", index, count);
#endif #endif
for (unsigned i = 0; i < count; ++i) for (unsigned i = 0; i < count; ++i)
write_block(index + i, data + i * block_size()); write_block(index + i, data + i * block_size(), description);
return true; return true;
} }
bool DiskBackedFS::read_block(unsigned index, u8* buffer) const bool DiskBackedFS::read_block(unsigned index, u8* buffer, FileDescription* description) const
{ {
#ifdef DBFS_DEBUG #ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::read_block %u\n", index); kprintf("DiskBackedFileSystem::read_block %u\n", index);
#endif #endif
bool allow_cache = !description || !description->is_direct();
if (!allow_cache) {
const_cast<DiskBackedFS*>(this)->flush_specific_block_if_needed(index);
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
bool success = device().read(base_offset, block_size(), buffer);
ASSERT(success);
return true;
}
auto& entry = cache().get(index); auto& entry = cache().get(index);
if (!entry.has_data) { if (!entry.has_data) {
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size()); DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
@ -133,16 +154,16 @@ bool DiskBackedFS::read_block(unsigned index, u8* buffer) const
return true; return true;
} }
bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer) const bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer, FileDescription* description) const
{ {
if (!count) if (!count)
return false; return false;
if (count == 1) if (count == 1)
return read_block(index, buffer); return read_block(index, buffer, description);
u8* out = buffer; u8* out = buffer;
for (unsigned i = 0; i < count; ++i) { for (unsigned i = 0; i < count; ++i) {
if (!read_block(index + i, out)) if (!read_block(index + i, out, description))
return false; return false;
out += block_size(); out += block_size();
} }
@ -150,6 +171,20 @@ bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer) const
return true; return true;
} }
void DiskBackedFS::flush_specific_block_if_needed(unsigned index)
{
LOCKER(m_lock);
if (!cache().is_dirty())
return;
cache().for_each_entry([&](CacheEntry& entry) {
if (entry.is_dirty && entry.block_index == index) {
DiskOffset base_offset = static_cast<DiskOffset>(entry.block_index) * static_cast<DiskOffset>(block_size());
device().write(base_offset, block_size(), entry.data);
entry.is_dirty = false;
}
});
}
void DiskBackedFS::flush_writes_impl() void DiskBackedFS::flush_writes_impl()
{ {
LOCKER(m_lock); LOCKER(m_lock);

View file

@ -21,14 +21,15 @@ public:
protected: protected:
explicit DiskBackedFS(NonnullRefPtr<DiskDevice>&&); explicit DiskBackedFS(NonnullRefPtr<DiskDevice>&&);
bool read_block(unsigned index, u8* buffer) const; bool read_block(unsigned index, u8* buffer, FileDescription* = nullptr) const;
bool read_blocks(unsigned index, unsigned count, u8* buffer) const; bool read_blocks(unsigned index, unsigned count, u8* buffer, FileDescription* = nullptr) const;
bool write_block(unsigned index, const u8*); bool write_block(unsigned index, const u8*, FileDescription* = nullptr);
bool write_blocks(unsigned index, unsigned count, const u8*); bool write_blocks(unsigned index, unsigned count, const u8*, FileDescription* = nullptr);
private: private:
DiskCache& cache() const; DiskCache& cache() const;
void flush_specific_block_if_needed(unsigned index);
NonnullRefPtr<DiskDevice> m_device; NonnullRefPtr<DiskDevice> m_device;
mutable OwnPtr<DiskCache> m_cache; mutable OwnPtr<DiskCache> m_cache;

View file

@ -2,6 +2,7 @@
#include <AK/BufferStream.h> #include <AK/BufferStream.h>
#include <AK/StdLibExtras.h> #include <AK/StdLibExtras.h>
#include <Kernel/FileSystem/Ext2FileSystem.h> #include <Kernel/FileSystem/Ext2FileSystem.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/FileSystem/ext2_fs.h> #include <Kernel/FileSystem/ext2_fs.h>
#include <Kernel/Process.h> #include <Kernel/Process.h>
#include <Kernel/RTC.h> #include <Kernel/RTC.h>
@ -90,7 +91,6 @@ bool Ext2FS::initialize()
kprintf("ext2fs: desc size = %u\n", EXT2_DESC_SIZE(&super_block)); kprintf("ext2fs: desc size = %u\n", EXT2_DESC_SIZE(&super_block));
#endif #endif
set_block_size(EXT2_BLOCK_SIZE(&super_block)); set_block_size(EXT2_BLOCK_SIZE(&super_block));
ASSERT(block_size() <= (int)max_block_size); ASSERT(block_size() <= (int)max_block_size);
@ -610,7 +610,7 @@ RefPtr<Inode> Ext2FS::get_inode(InodeIdentifier inode) const
return new_inode; return new_inode;
} }
ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription*) const ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription* description) const
{ {
Locker inode_locker(m_lock); Locker inode_locker(m_lock);
ASSERT(offset >= 0); ASSERT(offset >= 0);
@ -656,7 +656,7 @@ ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDes
u8 block[max_block_size]; u8 block[max_block_size];
for (int bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) { for (int bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
bool success = fs().read_block(m_block_list[bi], block); bool success = fs().read_block(m_block_list[bi], block, description);
if (!success) { if (!success) {
kprintf("ext2fs: read_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi); kprintf("ext2fs: read_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
return -EIO; return -EIO;
@ -694,7 +694,6 @@ KResult Ext2FSInode::resize(u64 new_size)
return KResult(-ENOSPC); return KResult(-ENOSPC);
} }
auto block_list = fs().block_list_for_inode(m_raw_inode); auto block_list = fs().block_list_for_inode(m_raw_inode);
if (blocks_needed_after > blocks_needed_before) { if (blocks_needed_after > blocks_needed_before) {
auto new_blocks = fs().allocate_blocks(fs().group_index_from_inode(index()), blocks_needed_after - blocks_needed_before); auto new_blocks = fs().allocate_blocks(fs().group_index_from_inode(index()), blocks_needed_after - blocks_needed_before);
@ -723,7 +722,7 @@ KResult Ext2FSInode::resize(u64 new_size)
return KSuccess; return KSuccess;
} }
ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription*) ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription* description)
{ {
ASSERT(offset >= 0); ASSERT(offset >= 0);
ASSERT(count >= 0); ASSERT(count >= 0);
@ -785,7 +784,7 @@ ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, Fi
ByteBuffer block; ByteBuffer block;
if (offset_into_block != 0 || num_bytes_to_copy != block_size) { if (offset_into_block != 0 || num_bytes_to_copy != block_size) {
block = ByteBuffer::create_uninitialized(block_size); block = ByteBuffer::create_uninitialized(block_size);
bool success = fs().read_block(m_block_list[bi], block.data()); bool success = fs().read_block(m_block_list[bi], block.data(), description);
if (!success) { if (!success) {
kprintf("Ext2FSInode::write_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi); kprintf("Ext2FSInode::write_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
return -EIO; return -EIO;
@ -805,7 +804,7 @@ ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, Fi
#ifdef EXT2_DEBUG #ifdef EXT2_DEBUG
dbgprintf("Ext2FSInode::write_bytes: writing block %u (offset_into_block: %u)\n", m_block_list[bi], offset_into_block); dbgprintf("Ext2FSInode::write_bytes: writing block %u (offset_into_block: %u)\n", m_block_list[bi], offset_into_block);
#endif #endif
bool success = fs().write_block(m_block_list[bi], block.data()); bool success = fs().write_block(m_block_list[bi], block.data(), description);
if (!success) { if (!success) {
kprintf("Ext2FSInode::write_bytes: write_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi); kprintf("Ext2FSInode::write_bytes: write_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();

View file

@ -293,6 +293,7 @@ void FileDescription::set_file_flags(u32 flags)
{ {
m_is_blocking = !(flags & O_NONBLOCK); m_is_blocking = !(flags & O_NONBLOCK);
m_should_append = flags & O_APPEND; m_should_append = flags & O_APPEND;
m_direct = flags & O_DIRECT;
m_file_flags = flags; m_file_flags = flags;
} }

View file

@ -44,6 +44,8 @@ public:
String absolute_path() const; String absolute_path() const;
bool is_direct() const { return m_direct; }
bool is_directory() const { return m_is_directory; } bool is_directory() const { return m_is_directory; }
File& file() { return *m_file; } File& file() { return *m_file; }
@ -117,5 +119,6 @@ private:
bool m_is_blocking { true }; bool m_is_blocking { true };
bool m_is_directory { false }; bool m_is_directory { false };
bool m_should_append { false }; bool m_should_append { false };
bool m_direct { false };
FIFO::Direction m_fifo_direction { FIFO::Direction::Neither }; FIFO::Direction m_fifo_direction { FIFO::Direction::Neither };
}; };

View file

@ -24,6 +24,7 @@
#define O_DIRECTORY 00200000 #define O_DIRECTORY 00200000
#define O_NOFOLLOW 00400000 #define O_NOFOLLOW 00400000
#define O_CLOEXEC 02000000 #define O_CLOEXEC 02000000
#define O_DIRECT 04000000
#define O_NOFOLLOW_NOERROR 0x4000000 #define O_NOFOLLOW_NOERROR 0x4000000
class Custody; class Custody;

View file

@ -25,6 +25,7 @@ __BEGIN_DECLS
#define O_DIRECTORY 00200000 #define O_DIRECTORY 00200000
#define O_NOFOLLOW 00400000 #define O_NOFOLLOW 00400000
#define O_CLOEXEC 02000000 #define O_CLOEXEC 02000000
#define O_DIRECT 04000000
#define S_IFMT 0170000 #define S_IFMT 0170000
#define S_IFDIR 0040000 #define S_IFDIR 0040000