1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-26 01:15:07 +00:00

NVMe: Add shadow doorbell support

Shadow doorbell feature was added in the NVMe spec to improve
the performance of virtual devices.

Typically, ringing a doorbell involves writing to an MMIO register in
QEMU, which can be expensive as there will be a trap for the VM.

Shadow doorbell mechanism was added for the VM to communicate with the
OS when it needs to do an MMIO write, thereby avoiding it when it is
not necessary.

There is no performance improvement with this support in Serenity
at the moment because of the block layer constraint of not batching
multiple IOs. Once the command batching support is added to the block
layer, shadow doorbell support can improve performance by avoiding many
MMIO writes.

Default to old MMIO mechanism if shadow doorbell is not supported.
This commit is contained in:
Pankaj Raghav 2023-08-02 12:41:31 +02:00 committed by Jelle Raaijmakers
parent 5b774f3617
commit 7138395982
5 changed files with 121 additions and 6 deletions

View file

@ -62,6 +62,7 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize(bool is_queue_polled)
VERIFY(IO_QUEUE_SIZE < MQES(caps));
dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE);
TRY(identify_and_init_controller());
// Create an IO queue per core
for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) {
// qid is zero is used for admin queue
@ -216,6 +217,65 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::identify_and_init_namespaces()
return {};
}
ErrorOr<void> NVMeController::identify_and_init_controller()
{
RefPtr<Memory::PhysicalPage> prp_dma_buffer;
OwnPtr<Memory::Region> prp_dma_region;
IdentifyController ctrl {};
{
auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP"sv, Memory::Region::Access::ReadWrite, prp_dma_buffer));
prp_dma_region = move(buffer);
}
// Check if the controller supports shadow doorbell
{
NVMeSubmission sub {};
u16 status = 0;
sub.op = OP_ADMIN_IDENTIFY;
sub.identify.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
sub.identify.cns = NVMe_CNS_ID_CTRL & 0xff;
status = submit_admin_command(sub, true);
if (status) {
dmesgln_pci(*this, "Failed to identify active namespace command");
return EFAULT;
}
if (void* fault_at; !safe_memcpy(&ctrl, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
return EFAULT;
}
}
if (ctrl.oacs & ID_CTRL_SHADOW_DBBUF_MASK) {
OwnPtr<Memory::Region> dbbuf_dma_region;
OwnPtr<Memory::Region> eventidx_dma_region;
{
auto buffer = TRY(MM.allocate_dma_buffer_page("shadow dbbuf"sv, Memory::Region::Access::ReadWrite, m_dbbuf_shadow_page));
dbbuf_dma_region = move(buffer);
memset(dbbuf_dma_region->vaddr().as_ptr(), 0, PAGE_SIZE);
}
{
auto buffer = TRY(MM.allocate_dma_buffer_page("eventidx"sv, Memory::Region::Access::ReadWrite, m_dbbuf_eventidx_page));
eventidx_dma_region = move(buffer);
memset(eventidx_dma_region->vaddr().as_ptr(), 0, PAGE_SIZE);
}
{
NVMeSubmission sub {};
sub.op = OP_ADMIN_DBBUF_CONFIG;
sub.dbbuf_cmd.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_dbbuf_shadow_page->paddr().as_ptr()));
sub.dbbuf_cmd.data_ptr.prp2 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_dbbuf_eventidx_page->paddr().as_ptr()));
submit_admin_command(sub, true);
}
dbgln_if(NVME_DEBUG, "Shadow doorbell Enabled!");
}
return {};
}
UNMAP_AFTER_INIT Tuple<u64, u8> NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct)
{
auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK;
@ -283,6 +343,8 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(QueueType queu
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + REG_SQ0TDBL_START)));
Doorbell doorbell = {
.mmio_reg = move(doorbell_regs),
.dbbuf_shadow = {},
.dbbuf_eventidx = {},
};
m_controller_regs->acq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr()));
@ -352,10 +414,20 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 qid, QueueType
submit_admin_command(sub, true);
}
auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride));
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + queue_doorbell_offset)));
auto queue_doorbell_offset = (2 * qid) * (4 << m_dbl_stride);
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + REG_SQ0TDBL_START + queue_doorbell_offset)));
Memory::TypedMapping<DoorbellRegister> shadow_doorbell_regs {};
Memory::TypedMapping<DoorbellRegister> eventidx_doorbell_regs {};
if (!m_dbbuf_shadow_page.is_null()) {
shadow_doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister>(m_dbbuf_shadow_page->paddr().offset(queue_doorbell_offset)));
eventidx_doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister>(m_dbbuf_eventidx_page->paddr().offset(queue_doorbell_offset)));
}
Doorbell doorbell = {
.mmio_reg = move(doorbell_regs),
.dbbuf_shadow = move(shadow_doorbell_regs),
.dbbuf_eventidx = move(eventidx_doorbell_regs),
};
auto irq = TRY(allocate_irq(qid));

View file

@ -58,6 +58,7 @@ private:
NVMeController(PCI::DeviceIdentifier const&, u32 hardware_relative_controller_id);
ErrorOr<void> identify_and_init_namespaces();
ErrorOr<void> identify_and_init_controller();
Tuple<u64, u8> get_ns_features(IdentifyNamespace& identify_data_struct);
ErrorOr<void> create_admin_queue(QueueType queue_type);
ErrorOr<void> create_io_queue(u8 qid, QueueType queue_type);
@ -72,6 +73,8 @@ private:
Vector<NonnullLockRefPtr<NVMeQueue>> m_queues;
Vector<NonnullLockRefPtr<NVMeNameSpace>> m_namespaces;
Memory::TypedMapping<ControllerRegister volatile> m_controller_regs;
RefPtr<Memory::PhysicalPage> m_dbbuf_shadow_page;
RefPtr<Memory::PhysicalPage> m_dbbuf_eventidx_page;
bool m_admin_queue_ready { false };
size_t m_device_count { 0 };
AK::Duration m_ready_timeout;

View file

@ -34,10 +34,21 @@ struct IdentifyNamespace {
u64 rsvd3[488];
};
// FIXME: For now only one value is used. Once we start using
// more values from id_ctrl command, use separate member variables
// instead of using rsd array.
struct IdentifyController {
u8 rsdv1[256];
u16 oacs;
u8 rsdv2[3838];
};
// DOORBELL
static constexpr u32 REG_SQ0TDBL_START = 0x1000;
static constexpr u32 REG_SQ0TDBL_END = 0x1003;
static constexpr u8 DBL_REG_SIZE = 8;
static constexpr u16 ID_CTRL_SHADOW_DBBUF_MASK = 0x0100;
// CAP
static constexpr u8 CAP_DBL_SHIFT = 32;
static constexpr u8 CAP_DBL_MASK = 0xf;
@ -99,8 +110,9 @@ static constexpr u16 IO_QUEUE_SIZE = 64; // TODO:Need to be configurable
// IDENTIFY
static constexpr u16 NVMe_IDENTIFY_SIZE = 4096;
static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2;
static constexpr u8 NVMe_CNS_ID_NS = 0x0;
static constexpr u8 NVMe_CNS_ID_CTRL = 0x1;
static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2;
static constexpr u8 FLBA_SIZE_INDEX = 26;
static constexpr u8 FLBA_SIZE_MASK = 0xf;
static constexpr u8 LBA_FORMAT_SUPPORT_INDEX = 128;
@ -112,6 +124,7 @@ enum AdminCommandOpCode {
OP_ADMIN_CREATE_COMPLETION_QUEUE = 0x5,
OP_ADMIN_CREATE_SUBMISSION_QUEUE = 0x1,
OP_ADMIN_IDENTIFY = 0x6,
OP_ADMIN_DBBUF_CONFIG = 0x7C,
};
// IO opcodes
@ -202,6 +215,12 @@ struct [[gnu::packed]] NVMeCreateSQCmd {
u64 rsvd12[2];
};
struct [[gnu::packed]] NVMeDBBUFCmd {
u32 rsvd1[5];
struct DataPtr data_ptr;
u32 rsvd12[6];
};
struct [[gnu::packed]] NVMeSubmission {
u8 op;
u8 flags;
@ -212,5 +231,6 @@ struct [[gnu::packed]] NVMeSubmission {
NVMeRWCmd rw;
NVMeCreateCQCmd create_cq;
NVMeCreateSQCmd create_sq;
NVMeDBBUFCmd dbbuf_cmd;
};
};

View file

@ -101,7 +101,6 @@ void NVMeQueue::submit_sqe(NVMeSubmission& sub)
}
dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail);
full_memory_barrier();
update_sq_doorbell();
}

View file

@ -28,6 +28,8 @@ struct DoorbellRegister {
struct Doorbell {
Memory::TypedMapping<DoorbellRegister volatile> mmio_reg;
Memory::TypedMapping<DoorbellRegister> dbbuf_shadow;
Memory::TypedMapping<DoorbellRegister> dbbuf_eventidx;
};
enum class QueueType {
@ -62,9 +64,25 @@ public:
protected:
u32 process_cq();
// Updates the shadow buffer and returns if mmio is needed
bool update_shadow_buf(u16 new_value, u32* dbbuf, u32* ei)
{
u32 const old = *dbbuf;
*dbbuf = new_value;
AK::full_memory_barrier();
bool need_mmio = static_cast<u16>(new_value - *ei - 1) < static_cast<u16>(new_value - old);
return need_mmio;
}
void update_sq_doorbell()
{
m_db_regs.mmio_reg->sq_tail = m_sq_tail;
full_memory_barrier();
if (m_db_regs.dbbuf_shadow.paddr.is_null()
|| update_shadow_buf(m_sq_tail, &m_db_regs.dbbuf_shadow->sq_tail, &m_db_regs.dbbuf_eventidx->sq_tail))
m_db_regs.mmio_reg->sq_tail = m_sq_tail;
}
NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Doorbell db_regs);
@ -88,7 +106,10 @@ private:
virtual void complete_current_request(u16 cmdid, u16 status) = 0;
void update_cq_doorbell()
{
m_db_regs.mmio_reg->cq_head = m_cq_head;
full_memory_barrier();
if (m_db_regs.dbbuf_shadow.paddr.is_null()
|| update_shadow_buf(m_cq_head, &m_db_regs.dbbuf_shadow->cq_head, &m_db_regs.dbbuf_eventidx->cq_head))
m_db_regs.mmio_reg->cq_head = m_cq_head;
}
protected: