From 7138395982bb3a47d87c20ad32211c77f05fcc2c Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Wed, 2 Aug 2023 12:41:31 +0200 Subject: [PATCH] NVMe: Add shadow doorbell support Shadow doorbell feature was added in the NVMe spec to improve the performance of virtual devices. Typically, ringing a doorbell involves writing to an MMIO register in QEMU, which can be expensive as there will be a trap for the VM. Shadow doorbell mechanism was added for the VM to communicate with the OS when it needs to do an MMIO write, thereby avoiding it when it is not necessary. There is no performance improvement with this support in Serenity at the moment because of the block layer constraint of not batching multiple IOs. Once the command batching support is added to the block layer, shadow doorbell support can improve performance by avoiding many MMIO writes. Default to old MMIO mechanism if shadow doorbell is not supported. --- .../Devices/Storage/NVMe/NVMeController.cpp | 76 ++++++++++++++++++- Kernel/Devices/Storage/NVMe/NVMeController.h | 3 + Kernel/Devices/Storage/NVMe/NVMeDefinitions.h | 22 +++++- Kernel/Devices/Storage/NVMe/NVMeQueue.cpp | 1 - Kernel/Devices/Storage/NVMe/NVMeQueue.h | 25 +++++- 5 files changed, 121 insertions(+), 6 deletions(-) diff --git a/Kernel/Devices/Storage/NVMe/NVMeController.cpp b/Kernel/Devices/Storage/NVMe/NVMeController.cpp index 3d7f893c80..0511c1f79a 100644 --- a/Kernel/Devices/Storage/NVMe/NVMeController.cpp +++ b/Kernel/Devices/Storage/NVMe/NVMeController.cpp @@ -62,6 +62,7 @@ UNMAP_AFTER_INIT ErrorOr NVMeController::initialize(bool is_queue_polled) VERIFY(IO_QUEUE_SIZE < MQES(caps)); dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE); + TRY(identify_and_init_controller()); // Create an IO queue per core for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) { // qid is zero is used for admin queue @@ -216,6 +217,65 @@ UNMAP_AFTER_INIT ErrorOr NVMeController::identify_and_init_namespaces() return {}; } +ErrorOr NVMeController::identify_and_init_controller() +{ + RefPtr prp_dma_buffer; + OwnPtr prp_dma_region; + IdentifyController ctrl {}; + + { + auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP"sv, Memory::Region::Access::ReadWrite, prp_dma_buffer)); + prp_dma_region = move(buffer); + } + + // Check if the controller supports shadow doorbell + { + NVMeSubmission sub {}; + u16 status = 0; + sub.op = OP_ADMIN_IDENTIFY; + sub.identify.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr())); + sub.identify.cns = NVMe_CNS_ID_CTRL & 0xff; + status = submit_admin_command(sub, true); + if (status) { + dmesgln_pci(*this, "Failed to identify active namespace command"); + return EFAULT; + } + if (void* fault_at; !safe_memcpy(&ctrl, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) { + return EFAULT; + } + } + + if (ctrl.oacs & ID_CTRL_SHADOW_DBBUF_MASK) { + OwnPtr dbbuf_dma_region; + OwnPtr eventidx_dma_region; + + { + auto buffer = TRY(MM.allocate_dma_buffer_page("shadow dbbuf"sv, Memory::Region::Access::ReadWrite, m_dbbuf_shadow_page)); + dbbuf_dma_region = move(buffer); + memset(dbbuf_dma_region->vaddr().as_ptr(), 0, PAGE_SIZE); + } + + { + auto buffer = TRY(MM.allocate_dma_buffer_page("eventidx"sv, Memory::Region::Access::ReadWrite, m_dbbuf_eventidx_page)); + eventidx_dma_region = move(buffer); + memset(eventidx_dma_region->vaddr().as_ptr(), 0, PAGE_SIZE); + } + + { + NVMeSubmission sub {}; + sub.op = OP_ADMIN_DBBUF_CONFIG; + sub.dbbuf_cmd.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(m_dbbuf_shadow_page->paddr().as_ptr())); + sub.dbbuf_cmd.data_ptr.prp2 = reinterpret_cast(AK::convert_between_host_and_little_endian(m_dbbuf_eventidx_page->paddr().as_ptr())); + + submit_admin_command(sub, true); + } + + dbgln_if(NVME_DEBUG, "Shadow doorbell Enabled!"); + } + + return {}; +} + UNMAP_AFTER_INIT Tuple NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct) { auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK; @@ -283,6 +343,8 @@ UNMAP_AFTER_INIT ErrorOr NVMeController::create_admin_queue(QueueType queu auto doorbell_regs = TRY(Memory::map_typed_writable(PhysicalAddress(m_bar + REG_SQ0TDBL_START))); Doorbell doorbell = { .mmio_reg = move(doorbell_regs), + .dbbuf_shadow = {}, + .dbbuf_eventidx = {}, }; m_controller_regs->acq = reinterpret_cast(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr())); @@ -352,10 +414,20 @@ UNMAP_AFTER_INIT ErrorOr NVMeController::create_io_queue(u8 qid, QueueType submit_admin_command(sub, true); } - auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride)); - auto doorbell_regs = TRY(Memory::map_typed_writable(PhysicalAddress(m_bar + queue_doorbell_offset))); + auto queue_doorbell_offset = (2 * qid) * (4 << m_dbl_stride); + auto doorbell_regs = TRY(Memory::map_typed_writable(PhysicalAddress(m_bar + REG_SQ0TDBL_START + queue_doorbell_offset))); + Memory::TypedMapping shadow_doorbell_regs {}; + Memory::TypedMapping eventidx_doorbell_regs {}; + + if (!m_dbbuf_shadow_page.is_null()) { + shadow_doorbell_regs = TRY(Memory::map_typed_writable(m_dbbuf_shadow_page->paddr().offset(queue_doorbell_offset))); + eventidx_doorbell_regs = TRY(Memory::map_typed_writable(m_dbbuf_eventidx_page->paddr().offset(queue_doorbell_offset))); + } + Doorbell doorbell = { .mmio_reg = move(doorbell_regs), + .dbbuf_shadow = move(shadow_doorbell_regs), + .dbbuf_eventidx = move(eventidx_doorbell_regs), }; auto irq = TRY(allocate_irq(qid)); diff --git a/Kernel/Devices/Storage/NVMe/NVMeController.h b/Kernel/Devices/Storage/NVMe/NVMeController.h index ac41104264..5c209e22cf 100644 --- a/Kernel/Devices/Storage/NVMe/NVMeController.h +++ b/Kernel/Devices/Storage/NVMe/NVMeController.h @@ -58,6 +58,7 @@ private: NVMeController(PCI::DeviceIdentifier const&, u32 hardware_relative_controller_id); ErrorOr identify_and_init_namespaces(); + ErrorOr identify_and_init_controller(); Tuple get_ns_features(IdentifyNamespace& identify_data_struct); ErrorOr create_admin_queue(QueueType queue_type); ErrorOr create_io_queue(u8 qid, QueueType queue_type); @@ -72,6 +73,8 @@ private: Vector> m_queues; Vector> m_namespaces; Memory::TypedMapping m_controller_regs; + RefPtr m_dbbuf_shadow_page; + RefPtr m_dbbuf_eventidx_page; bool m_admin_queue_ready { false }; size_t m_device_count { 0 }; AK::Duration m_ready_timeout; diff --git a/Kernel/Devices/Storage/NVMe/NVMeDefinitions.h b/Kernel/Devices/Storage/NVMe/NVMeDefinitions.h index 249a089576..92999fb212 100644 --- a/Kernel/Devices/Storage/NVMe/NVMeDefinitions.h +++ b/Kernel/Devices/Storage/NVMe/NVMeDefinitions.h @@ -34,10 +34,21 @@ struct IdentifyNamespace { u64 rsvd3[488]; }; +// FIXME: For now only one value is used. Once we start using +// more values from id_ctrl command, use separate member variables +// instead of using rsd array. +struct IdentifyController { + u8 rsdv1[256]; + u16 oacs; + u8 rsdv2[3838]; +}; + // DOORBELL static constexpr u32 REG_SQ0TDBL_START = 0x1000; static constexpr u32 REG_SQ0TDBL_END = 0x1003; static constexpr u8 DBL_REG_SIZE = 8; +static constexpr u16 ID_CTRL_SHADOW_DBBUF_MASK = 0x0100; + // CAP static constexpr u8 CAP_DBL_SHIFT = 32; static constexpr u8 CAP_DBL_MASK = 0xf; @@ -99,8 +110,9 @@ static constexpr u16 IO_QUEUE_SIZE = 64; // TODO:Need to be configurable // IDENTIFY static constexpr u16 NVMe_IDENTIFY_SIZE = 4096; -static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2; static constexpr u8 NVMe_CNS_ID_NS = 0x0; +static constexpr u8 NVMe_CNS_ID_CTRL = 0x1; +static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2; static constexpr u8 FLBA_SIZE_INDEX = 26; static constexpr u8 FLBA_SIZE_MASK = 0xf; static constexpr u8 LBA_FORMAT_SUPPORT_INDEX = 128; @@ -112,6 +124,7 @@ enum AdminCommandOpCode { OP_ADMIN_CREATE_COMPLETION_QUEUE = 0x5, OP_ADMIN_CREATE_SUBMISSION_QUEUE = 0x1, OP_ADMIN_IDENTIFY = 0x6, + OP_ADMIN_DBBUF_CONFIG = 0x7C, }; // IO opcodes @@ -202,6 +215,12 @@ struct [[gnu::packed]] NVMeCreateSQCmd { u64 rsvd12[2]; }; +struct [[gnu::packed]] NVMeDBBUFCmd { + u32 rsvd1[5]; + struct DataPtr data_ptr; + u32 rsvd12[6]; +}; + struct [[gnu::packed]] NVMeSubmission { u8 op; u8 flags; @@ -212,5 +231,6 @@ struct [[gnu::packed]] NVMeSubmission { NVMeRWCmd rw; NVMeCreateCQCmd create_cq; NVMeCreateSQCmd create_sq; + NVMeDBBUFCmd dbbuf_cmd; }; }; diff --git a/Kernel/Devices/Storage/NVMe/NVMeQueue.cpp b/Kernel/Devices/Storage/NVMe/NVMeQueue.cpp index 0d7772a7e4..8383cdebdc 100644 --- a/Kernel/Devices/Storage/NVMe/NVMeQueue.cpp +++ b/Kernel/Devices/Storage/NVMe/NVMeQueue.cpp @@ -101,7 +101,6 @@ void NVMeQueue::submit_sqe(NVMeSubmission& sub) } dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail); - full_memory_barrier(); update_sq_doorbell(); } diff --git a/Kernel/Devices/Storage/NVMe/NVMeQueue.h b/Kernel/Devices/Storage/NVMe/NVMeQueue.h index df05047684..a3c23a4072 100644 --- a/Kernel/Devices/Storage/NVMe/NVMeQueue.h +++ b/Kernel/Devices/Storage/NVMe/NVMeQueue.h @@ -28,6 +28,8 @@ struct DoorbellRegister { struct Doorbell { Memory::TypedMapping mmio_reg; + Memory::TypedMapping dbbuf_shadow; + Memory::TypedMapping dbbuf_eventidx; }; enum class QueueType { @@ -62,9 +64,25 @@ public: protected: u32 process_cq(); + + // Updates the shadow buffer and returns if mmio is needed + bool update_shadow_buf(u16 new_value, u32* dbbuf, u32* ei) + { + u32 const old = *dbbuf; + + *dbbuf = new_value; + AK::full_memory_barrier(); + + bool need_mmio = static_cast(new_value - *ei - 1) < static_cast(new_value - old); + return need_mmio; + } + void update_sq_doorbell() { - m_db_regs.mmio_reg->sq_tail = m_sq_tail; + full_memory_barrier(); + if (m_db_regs.dbbuf_shadow.paddr.is_null() + || update_shadow_buf(m_sq_tail, &m_db_regs.dbbuf_shadow->sq_tail, &m_db_regs.dbbuf_eventidx->sq_tail)) + m_db_regs.mmio_reg->sq_tail = m_sq_tail; } NVMeQueue(NonnullOwnPtr rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr cq_dma_region, OwnPtr sq_dma_region, Doorbell db_regs); @@ -88,7 +106,10 @@ private: virtual void complete_current_request(u16 cmdid, u16 status) = 0; void update_cq_doorbell() { - m_db_regs.mmio_reg->cq_head = m_cq_head; + full_memory_barrier(); + if (m_db_regs.dbbuf_shadow.paddr.is_null() + || update_shadow_buf(m_cq_head, &m_db_regs.dbbuf_shadow->cq_head, &m_db_regs.dbbuf_eventidx->cq_head)) + m_db_regs.mmio_reg->cq_head = m_cq_head; } protected: