1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 03:57:44 +00:00

Kernel: Move the Storage directory to be a new directory under Devices

The Storage subsystem, like the Audio and HID subsystems, exposes Unix
device files (for example, in the /dev directory). To ensure consistency
across the repository, we should make the Storage subsystem to reside in
the Kernel/Devices directory like the two other mentioned subsystems.
This commit is contained in:
Liav A 2023-03-18 13:32:12 +02:00 committed by Jelle Raaijmakers
parent f3a58f3a5a
commit 500b7b08d6
59 changed files with 133 additions and 133 deletions

View file

@ -0,0 +1,360 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
* Copyright (c) 2022, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <AK/Types.h>
#include <Kernel/Arch/Delay.h>
#include <Kernel/Arch/Interrupts.h>
#include <Kernel/Arch/SafeMem.h>
#include <Kernel/Bus/PCI/API.h>
#include <Kernel/CommandLine.h>
#include <Kernel/Devices/Device.h>
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
#include <Kernel/Devices/Storage/StorageManagement.h>
#include <Kernel/Library/LockRefPtr.h>
#include <Kernel/Sections.h>
namespace Kernel {
UNMAP_AFTER_INIT ErrorOr<NonnullRefPtr<NVMeController>> NVMeController::try_initialize(Kernel::PCI::DeviceIdentifier const& device_identifier, bool is_queue_polled)
{
auto controller = TRY(adopt_nonnull_ref_or_enomem(new NVMeController(device_identifier, StorageManagement::generate_relative_nvme_controller_id({}))));
TRY(controller->initialize(is_queue_polled));
return controller;
}
UNMAP_AFTER_INIT NVMeController::NVMeController(const PCI::DeviceIdentifier& device_identifier, u32 hardware_relative_controller_id)
: PCI::Device(const_cast<PCI::DeviceIdentifier&>(device_identifier))
, StorageController(hardware_relative_controller_id)
{
}
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize(bool is_queue_polled)
{
// Nr of queues = one queue per core
auto nr_of_queues = Processor::count();
auto queue_type = is_queue_polled ? QueueType::Polled : QueueType::IRQ;
PCI::enable_memory_space(device_identifier());
PCI::enable_bus_mastering(device_identifier());
m_bar = PCI::get_BAR0(device_identifier()) & PCI::bar_address_mask;
static_assert(sizeof(ControllerRegister) == REG_SQ0TDBL_START);
static_assert(sizeof(NVMeSubmission) == (1 << SQ_WIDTH));
// Map only until doorbell register for the controller
// Queues will individually map the doorbell register respectively
m_controller_regs = TRY(Memory::map_typed_writable<ControllerRegister volatile>(PhysicalAddress(m_bar)));
auto caps = m_controller_regs->cap;
m_ready_timeout = Duration::from_milliseconds((CAP_TO(caps) + 1) * 500); // CAP.TO is in 500ms units
calculate_doorbell_stride();
// IO queues + 1 admin queue
m_irq_type = TRY(reserve_irqs(nr_of_queues + 1, true));
TRY(create_admin_queue(queue_type));
VERIFY(m_admin_queue_ready == true);
VERIFY(IO_QUEUE_SIZE < MQES(caps));
dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE);
// Create an IO queue per core
for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) {
// qid is zero is used for admin queue
TRY(create_io_queue(cpuid + 1, queue_type));
}
TRY(identify_and_init_namespaces());
return {};
}
bool NVMeController::wait_for_ready(bool expected_ready_bit_value)
{
constexpr size_t one_ms_io_delay = 1000;
auto wait_iterations = m_ready_timeout.to_milliseconds();
u32 expected_rdy = expected_ready_bit_value ? 1 : 0;
while (((m_controller_regs->csts >> CSTS_RDY_BIT) & 0x1) != expected_rdy) {
microseconds_delay(one_ms_io_delay);
if (--wait_iterations == 0) {
if (((m_controller_regs->csts >> CSTS_RDY_BIT) & 0x1) != expected_rdy) {
dbgln_if(NVME_DEBUG, "NVMEController: CSTS.RDY still not set to {} after {} ms", expected_rdy, m_ready_timeout.to_milliseconds());
return false;
}
break;
}
}
return true;
}
ErrorOr<void> NVMeController::reset_controller()
{
if ((m_controller_regs->cc & (1 << CC_EN_BIT)) != 0) {
// If the EN bit is already set, we need to wait
// until the RDY bit is 1, otherwise the behavior is undefined
if (!wait_for_ready(true))
return Error::from_errno(ETIMEDOUT);
}
auto cc = m_controller_regs->cc;
cc = cc & ~(1 << CC_EN_BIT);
m_controller_regs->cc = cc;
full_memory_barrier();
// Wait until the RDY bit is cleared
if (!wait_for_ready(false))
return Error::from_errno(ETIMEDOUT);
return {};
}
ErrorOr<void> NVMeController::start_controller()
{
if (!(m_controller_regs->cc & (1 << CC_EN_BIT))) {
// If the EN bit is not already set, we need to wait
// until the RDY bit is 0, otherwise the behavior is undefined
if (!wait_for_ready(false))
return Error::from_errno(ETIMEDOUT);
}
auto cc = m_controller_regs->cc;
cc = cc | (1 << CC_EN_BIT);
cc = cc | (CQ_WIDTH << CC_IOCQES_BIT);
cc = cc | (SQ_WIDTH << CC_IOSQES_BIT);
m_controller_regs->cc = cc;
full_memory_barrier();
// Wait until the RDY bit is set
if (!wait_for_ready(true))
return Error::from_errno(ETIMEDOUT);
return {};
}
UNMAP_AFTER_INIT u32 NVMeController::get_admin_q_dept()
{
u32 aqa = m_controller_regs->aqa;
// Queue depth is 0 based
u32 q_depth = min(ACQ_SIZE(aqa), ASQ_SIZE(aqa)) + 1;
dbgln_if(NVME_DEBUG, "NVMe: Admin queue depth is {}", q_depth);
return q_depth;
}
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::identify_and_init_namespaces()
{
RefPtr<Memory::PhysicalPage> prp_dma_buffer;
OwnPtr<Memory::Region> prp_dma_region;
auto namespace_data_struct = TRY(ByteBuffer::create_zeroed(NVMe_IDENTIFY_SIZE));
u32 active_namespace_list[NVMe_IDENTIFY_SIZE / sizeof(u32)];
{
auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP"sv, Memory::Region::Access::ReadWrite, prp_dma_buffer));
prp_dma_region = move(buffer);
}
// Get the active namespace
{
NVMeSubmission sub {};
u16 status = 0;
sub.op = OP_ADMIN_IDENTIFY;
sub.identify.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
sub.identify.cns = NVMe_CNS_ID_ACTIVE_NS & 0xff;
status = submit_admin_command(sub, true);
if (status) {
dmesgln_pci(*this, "Failed to identify active namespace command");
return EFAULT;
}
if (void* fault_at; !safe_memcpy(active_namespace_list, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
return EFAULT;
}
}
// Get the NAMESPACE attributes
{
NVMeSubmission sub {};
IdentifyNamespace id_ns {};
u16 status = 0;
for (auto nsid : active_namespace_list) {
memset(prp_dma_region->vaddr().as_ptr(), 0, NVMe_IDENTIFY_SIZE);
// Invalid NS
if (nsid == 0)
break;
sub.op = OP_ADMIN_IDENTIFY;
sub.identify.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
sub.identify.cns = NVMe_CNS_ID_NS & 0xff;
sub.identify.nsid = nsid;
status = submit_admin_command(sub, true);
if (status) {
dmesgln_pci(*this, "Failed identify namespace with nsid {}", nsid);
return EFAULT;
}
static_assert(sizeof(IdentifyNamespace) == NVMe_IDENTIFY_SIZE);
if (void* fault_at; !safe_memcpy(&id_ns, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
return EFAULT;
}
auto val = get_ns_features(id_ns);
auto block_counts = val.get<0>();
auto block_size = 1 << val.get<1>();
dbgln_if(NVME_DEBUG, "NVMe: Block count is {} and Block size is {}", block_counts, block_size);
m_namespaces.append(TRY(NVMeNameSpace::try_create(*this, m_queues, nsid, block_counts, block_size)));
m_device_count++;
dbgln_if(NVME_DEBUG, "NVMe: Initialized namespace with NSID: {}", nsid);
}
}
return {};
}
UNMAP_AFTER_INIT Tuple<u64, u8> NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct)
{
auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK;
auto namespace_size = identify_data_struct.nsze;
auto lba_format = identify_data_struct.lbaf[flbas];
auto lba_size = (lba_format & LBA_SIZE_MASK) >> 16;
return Tuple<u64, u8>(namespace_size, lba_size);
}
LockRefPtr<StorageDevice> NVMeController::device(u32 index) const
{
return m_namespaces.at(index);
}
size_t NVMeController::devices_count() const
{
return m_device_count;
}
ErrorOr<void> NVMeController::reset()
{
TRY(reset_controller());
TRY(start_controller());
return {};
}
ErrorOr<void> NVMeController::shutdown()
{
return Error::from_errno(ENOTIMPL);
}
void NVMeController::complete_current_request([[maybe_unused]] AsyncDeviceRequest::RequestResult result)
{
VERIFY_NOT_REACHED();
}
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(QueueType queue_type)
{
auto qdepth = get_admin_q_dept();
OwnPtr<Memory::Region> cq_dma_region;
Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_pages;
OwnPtr<Memory::Region> sq_dma_region;
Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_pages;
auto cq_size = round_up_to_power_of_two(CQ_SIZE(qdepth), 4096);
auto sq_size = round_up_to_power_of_two(SQ_SIZE(qdepth), 4096);
auto maybe_error = reset_controller();
if (maybe_error.is_error()) {
dmesgln_pci(*this, "Failed to reset the NVMe controller");
return maybe_error;
}
{
auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "Admin CQ queue"sv, Memory::Region::Access::ReadWrite, cq_dma_pages));
cq_dma_region = move(buffer);
}
// Phase bit is important to determine completion, so zero out the space
// so that we don't get any garbage phase bit value
memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
{
auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "Admin SQ queue"sv, Memory::Region::Access::ReadWrite, sq_dma_pages));
sq_dma_region = move(buffer);
}
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + REG_SQ0TDBL_START)));
m_controller_regs->acq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr()));
m_controller_regs->asq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first()->paddr().as_ptr()));
auto irq = TRY(allocate_irq(0)); // Admin queue always uses the 0th index when using MSIx
maybe_error = start_controller();
if (maybe_error.is_error()) {
dmesgln_pci(*this, "Failed to restart the NVMe controller");
return maybe_error;
}
set_admin_queue_ready_flag();
m_admin_queue = TRY(NVMeQueue::try_create(*this, 0, irq, qdepth, move(cq_dma_region), move(sq_dma_region), move(doorbell_regs), queue_type));
dbgln_if(NVME_DEBUG, "NVMe: Admin queue created");
return {};
}
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 qid, QueueType queue_type)
{
OwnPtr<Memory::Region> cq_dma_region;
Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_pages;
OwnPtr<Memory::Region> sq_dma_region;
Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_pages;
auto cq_size = round_up_to_power_of_two(CQ_SIZE(IO_QUEUE_SIZE), 4096);
auto sq_size = round_up_to_power_of_two(SQ_SIZE(IO_QUEUE_SIZE), 4096);
{
auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "IO CQ queue"sv, Memory::Region::Access::ReadWrite, cq_dma_pages));
cq_dma_region = move(buffer);
}
// Phase bit is important to determine completion, so zero out the space
// so that we don't get any garbage phase bit value
memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
{
auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "IO SQ queue"sv, Memory::Region::Access::ReadWrite, sq_dma_pages));
sq_dma_region = move(buffer);
}
{
NVMeSubmission sub {};
sub.op = OP_ADMIN_CREATE_COMPLETION_QUEUE;
sub.create_cq.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr()));
sub.create_cq.cqid = qid;
// The queue size is 0 based
sub.create_cq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
auto flags = (queue_type == QueueType::IRQ) ? QUEUE_IRQ_ENABLED : QUEUE_IRQ_DISABLED;
flags |= QUEUE_PHY_CONTIGUOUS;
// When using MSIx interrupts, qid is used as an index into the interrupt table
sub.create_cq.irq_vector = (m_irq_type == PCI::InterruptType::PIN) ? 0 : qid;
sub.create_cq.cq_flags = AK::convert_between_host_and_little_endian(flags & 0xFFFF);
submit_admin_command(sub, true);
}
{
NVMeSubmission sub {};
sub.op = OP_ADMIN_CREATE_SUBMISSION_QUEUE;
sub.create_sq.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first()->paddr().as_ptr()));
sub.create_sq.sqid = qid;
// The queue size is 0 based
sub.create_sq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
auto flags = QUEUE_PHY_CONTIGUOUS;
sub.create_sq.cqid = qid;
sub.create_sq.sq_flags = AK::convert_between_host_and_little_endian(flags);
submit_admin_command(sub, true);
}
auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride));
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + queue_doorbell_offset)));
auto irq = TRY(allocate_irq(qid));
m_queues.append(TRY(NVMeQueue::try_create(*this, qid, irq, IO_QUEUE_SIZE, move(cq_dma_region), move(sq_dma_region), move(doorbell_regs), queue_type)));
dbgln_if(NVME_DEBUG, "NVMe: Created IO Queue with QID{}", m_queues.size());
return {};
}
}

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/OwnPtr.h>
#include <AK/Time.h>
#include <AK/Tuple.h>
#include <AK/Types.h>
#include <Kernel/Bus/PCI/Device.h>
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
#include <Kernel/Devices/Storage/NVMe/NVMeNameSpace.h>
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
#include <Kernel/Devices/Storage/StorageController.h>
#include <Kernel/Library/LockRefPtr.h>
#include <Kernel/Library/NonnullLockRefPtr.h>
#include <Kernel/Locking/Spinlock.h>
#include <Kernel/Memory/TypedMapping.h>
namespace Kernel {
class NVMeController : public PCI::Device
, public StorageController {
public:
static ErrorOr<NonnullRefPtr<NVMeController>> try_initialize(PCI::DeviceIdentifier const&, bool is_queue_polled);
ErrorOr<void> initialize(bool is_queue_polled);
LockRefPtr<StorageDevice> device(u32 index) const override;
size_t devices_count() const override;
virtual StringView device_name() const override { return "NVMeController"sv; }
protected:
ErrorOr<void> reset() override;
ErrorOr<void> shutdown() override;
void complete_current_request(AsyncDeviceRequest::RequestResult result) override;
public:
ErrorOr<void> reset_controller();
ErrorOr<void> start_controller();
u32 get_admin_q_dept();
u16 submit_admin_command(NVMeSubmission& sub, bool sync = false)
{
// First queue is always the admin queue
if (sync) {
return m_admin_queue->submit_sync_sqe(sub);
}
m_admin_queue->submit_sqe(sub);
return 0;
}
bool is_admin_queue_ready() { return m_admin_queue_ready; };
void set_admin_queue_ready_flag() { m_admin_queue_ready = true; };
private:
NVMeController(PCI::DeviceIdentifier const&, u32 hardware_relative_controller_id);
ErrorOr<void> identify_and_init_namespaces();
Tuple<u64, u8> get_ns_features(IdentifyNamespace& identify_data_struct);
ErrorOr<void> create_admin_queue(QueueType queue_type);
ErrorOr<void> create_io_queue(u8 qid, QueueType queue_type);
void calculate_doorbell_stride()
{
m_dbl_stride = (m_controller_regs->cap >> CAP_DBL_SHIFT) & CAP_DBL_MASK;
}
bool wait_for_ready(bool);
private:
LockRefPtr<NVMeQueue> m_admin_queue;
Vector<NonnullLockRefPtr<NVMeQueue>> m_queues;
Vector<NonnullLockRefPtr<NVMeNameSpace>> m_namespaces;
Memory::TypedMapping<ControllerRegister volatile> m_controller_regs;
bool m_admin_queue_ready { false };
size_t m_device_count { 0 };
AK::Duration m_ready_timeout;
u32 m_bar { 0 };
u8 m_dbl_stride { 0 };
PCI::InterruptType m_irq_type;
QueueType m_queue_type { QueueType::IRQ };
static Atomic<u8> s_controller_id;
};
}

View file

@ -0,0 +1,216 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Endian.h>
#include <AK/Types.h>
struct ControllerRegister {
u64 cap;
u32 vs;
u32 intms;
u32 intmc;
u32 cc;
u32 rsvd1;
u32 csts;
u32 nssr;
u32 aqa;
u64 asq;
u64 acq;
u64 rsvd2[505];
};
struct IdentifyNamespace {
u64 nsze;
u64 ncap;
u8 rsdv1[10];
u8 flbas;
u8 rsvd2[100];
u32 lbaf[16];
u64 rsvd3[488];
};
// DOORBELL
static constexpr u32 REG_SQ0TDBL_START = 0x1000;
static constexpr u32 REG_SQ0TDBL_END = 0x1003;
static constexpr u8 DBL_REG_SIZE = 8;
// CAP
static constexpr u8 CAP_DBL_SHIFT = 32;
static constexpr u8 CAP_DBL_MASK = 0xf;
static constexpr u8 CAP_TO_SHIFT = 24;
static constexpr u64 CAP_TO_MASK = 0xff << CAP_TO_SHIFT;
static constexpr u16 MQES(u64 cap)
{
return (cap & 0xffff) + 1;
}
static constexpr u32 CAP_TO(u64 cap)
{
return (cap & CAP_TO_MASK) >> CAP_TO_SHIFT;
}
// CC Controller Configuration
static constexpr u8 CC_EN_BIT = 0x0;
static constexpr u8 CSTS_RDY_BIT = 0x0;
static constexpr u8 CSTS_SHST_SHIFT = 2;
static constexpr u32 CSTS_SHST_MASK = 0x3 << CSTS_SHST_SHIFT;
static constexpr u8 CC_IOSQES_BIT = 16;
static constexpr u8 CC_IOCQES_BIT = 20;
static constexpr u32 CSTS_SHST(u32 x)
{
return (x & CSTS_SHST_MASK) >> CSTS_SHST_SHIFT;
}
static constexpr u16 CC_AQA_MASK = (0xfff);
static constexpr u16 ACQ_SIZE(u32 x)
{
return (x >> 16) & CC_AQA_MASK;
}
static constexpr u16 ASQ_SIZE(u32 x)
{
return x & CC_AQA_MASK;
}
static constexpr u8 CQ_WIDTH = 4; // CQ is 16 bytes(2^4) in size.
static constexpr u8 SQ_WIDTH = 6; // SQ size is 64 bytes(2^6) in size.
static constexpr u16 CQ_SIZE(u16 q_depth)
{
return q_depth << CQ_WIDTH;
}
static constexpr u16 SQ_SIZE(u16 q_depth)
{
return q_depth << SQ_WIDTH;
}
static constexpr u8 PHASE_TAG(u16 x)
{
return x & 0x1;
}
static constexpr u16 CQ_STATUS_FIELD_MASK = 0xfffe;
static constexpr u16 CQ_STATUS_FIELD(u16 x)
{
return (x & CQ_STATUS_FIELD_MASK) >> 1;
}
static constexpr u16 IO_QUEUE_SIZE = 64; // TODO:Need to be configurable
// IDENTIFY
static constexpr u16 NVMe_IDENTIFY_SIZE = 4096;
static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2;
static constexpr u8 NVMe_CNS_ID_NS = 0x0;
static constexpr u8 FLBA_SIZE_INDEX = 26;
static constexpr u8 FLBA_SIZE_MASK = 0xf;
static constexpr u8 LBA_FORMAT_SUPPORT_INDEX = 128;
static constexpr u32 LBA_SIZE_MASK = 0x00ff0000;
// OPCODES
// ADMIN COMMAND SET
enum AdminCommandOpCode {
OP_ADMIN_CREATE_COMPLETION_QUEUE = 0x5,
OP_ADMIN_CREATE_SUBMISSION_QUEUE = 0x1,
OP_ADMIN_IDENTIFY = 0x6,
};
// IO opcodes
enum IOCommandOpcode {
OP_NVME_WRITE = 0x1,
OP_NVME_READ = 0x2
};
// FLAGS
static constexpr u8 QUEUE_PHY_CONTIGUOUS = (1 << 0);
static constexpr u8 QUEUE_IRQ_ENABLED = (1 << 1);
static constexpr u8 QUEUE_IRQ_DISABLED = (0 << 1);
struct [[gnu::packed]] NVMeCompletion {
LittleEndian<u32> cmd_spec;
LittleEndian<u32> res;
LittleEndian<u16> sq_head; /* how much of this queue may be reclaimed */
LittleEndian<u16> sq_id; /* submission queue that generated this entry */
u16 command_id; /* of the command which completed */
LittleEndian<u16> status; /* did the command fail, and if so, why? */
};
struct [[gnu::packed]] DataPtr {
LittleEndian<u64> prp1;
LittleEndian<u64> prp2;
};
struct [[gnu::packed]] NVMeGenericCmd {
LittleEndian<u32> nsid;
LittleEndian<u64> rsvd;
LittleEndian<u64> metadata;
struct DataPtr data_ptr;
LittleEndian<u32> cdw10;
LittleEndian<u32> cdw11;
LittleEndian<u32> cdw12;
LittleEndian<u32> cdw13;
LittleEndian<u32> cdw14;
LittleEndian<u32> cdw15;
};
struct [[gnu::packed]] NVMeRWCmd {
LittleEndian<u32> nsid;
LittleEndian<u64> rsvd;
LittleEndian<u64> metadata;
struct DataPtr data_ptr;
LittleEndian<u64> slba;
LittleEndian<u16> length;
LittleEndian<u16> control;
LittleEndian<u32> dsmgmt;
LittleEndian<u32> reftag;
LittleEndian<u16> apptag;
LittleEndian<u16> appmask;
};
struct [[gnu::packed]] NVMeIdentifyCmd {
LittleEndian<u32> nsid;
LittleEndian<u64> rsvd1[2];
struct DataPtr data_ptr;
u8 cns;
u8 rsvd2;
LittleEndian<u16> ctrlid;
u8 rsvd3[3];
u8 csi;
u64 rsvd4[2];
};
struct [[gnu::packed]] NVMeCreateCQCmd {
u32 rsvd1[5];
LittleEndian<u64> prp1;
u64 rsvd2;
LittleEndian<u16> cqid;
LittleEndian<u16> qsize;
LittleEndian<u16> cq_flags;
LittleEndian<u16> irq_vector;
u64 rsvd12[2];
};
struct [[gnu::packed]] NVMeCreateSQCmd {
u32 rsvd1[5];
LittleEndian<u64> prp1;
u64 rsvd2;
LittleEndian<u16> sqid;
LittleEndian<u16> qsize;
LittleEndian<u16> sq_flags;
LittleEndian<u16> cqid;
u64 rsvd12[2];
};
struct [[gnu::packed]] NVMeSubmission {
u8 op;
u8 flags;
LittleEndian<u16> cmdid;
union [[gnu::packed]] {
NVMeGenericCmd generic;
NVMeIdentifyCmd identify;
NVMeRWCmd rw;
NVMeCreateCQCmd create_cq;
NVMeCreateSQCmd create_sq;
};
};

View file

@ -0,0 +1,93 @@
/*
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Devices/BlockDevice.h>
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
#include <Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h>
#include <Kernel/WorkQueue.h>
namespace Kernel {
ErrorOr<NonnullLockRefPtr<NVMeInterruptQueue>> NVMeInterruptQueue::try_create(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
{
auto queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMeInterruptQueue(device, move(rw_dma_region), rw_dma_page, qid, irq, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))));
queue->initialize_interrupt_queue();
return queue;
}
UNMAP_AFTER_INIT NVMeInterruptQueue::NVMeInterruptQueue(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
: NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))
, PCIIRQHandler(device, irq)
{
}
void NVMeInterruptQueue::initialize_interrupt_queue()
{
enable_irq();
}
bool NVMeInterruptQueue::handle_irq(RegisterState const&)
{
SpinlockLocker lock(m_request_lock);
return process_cq() ? true : false;
}
void NVMeInterruptQueue::submit_sqe(NVMeSubmission& sub)
{
NVMeQueue::submit_sqe(sub);
}
void NVMeInterruptQueue::complete_current_request(u16 cmdid, u16 status)
{
auto work_item_creation_result = g_io_work->try_queue([this, cmdid, status]() {
SpinlockLocker lock(m_request_lock);
auto& request_pdu = m_requests.get(cmdid).release_value();
auto current_request = request_pdu.request;
AsyncDeviceRequest::RequestResult req_result = AsyncDeviceRequest::Success;
ScopeGuard guard = [req_result, status, &request_pdu, &lock] {
// FIXME: We should unlock at the end of this function to make sure no new requests is inserted
// before we complete the request and calling end_io_handler but that results in a deadlock
// For now this is avoided by asserting the `used` field while inserting.
lock.unlock();
if (request_pdu.request)
request_pdu.request->complete(req_result);
if (request_pdu.end_io_handler)
request_pdu.end_io_handler(status);
request_pdu.used = false;
};
// There can be submission without any request associated with it such as with
// admin queue commands during init. If there is no request, we are done
if (!current_request)
return;
if (status) {
req_result = AsyncBlockDeviceRequest::Failure;
return;
}
if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), current_request->buffer_size()); result.is_error()) {
req_result = AsyncBlockDeviceRequest::MemoryFault;
return;
}
}
return;
});
if (work_item_creation_result.is_error()) {
SpinlockLocker lock(m_request_lock);
auto& request_pdu = m_requests.get(cmdid).release_value();
auto current_request = request_pdu.request;
current_request->complete(AsyncDeviceRequest::OutOfMemory);
if (request_pdu.end_io_handler)
request_pdu.end_io_handler(status);
request_pdu.used = false;
}
}
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
#include <Kernel/Interrupts/PCIIRQHandler.h>
namespace Kernel {
class NVMeInterruptQueue : public NVMeQueue
, public PCIIRQHandler {
public:
static ErrorOr<NonnullLockRefPtr<NVMeInterruptQueue>> try_create(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
void submit_sqe(NVMeSubmission& submission) override;
virtual ~NVMeInterruptQueue() override {};
virtual StringView purpose() const override { return "NVMe"sv; };
void initialize_interrupt_queue();
protected:
NVMeInterruptQueue(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
private:
virtual void complete_current_request(u16 cmdid, u16 status) override;
bool handle_irq(RegisterState const&) override;
};
}

View file

@ -0,0 +1,42 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/NonnullOwnPtr.h>
#include <Kernel/Devices/DeviceManagement.h>
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
#include <Kernel/Devices/Storage/NVMe/NVMeNameSpace.h>
#include <Kernel/Devices/Storage/StorageManagement.h>
namespace Kernel {
UNMAP_AFTER_INIT ErrorOr<NonnullLockRefPtr<NVMeNameSpace>> NVMeNameSpace::try_create(NVMeController const& controller, Vector<NonnullLockRefPtr<NVMeQueue>> queues, u16 nsid, size_t storage_size, size_t lba_size)
{
auto device = TRY(DeviceManagement::try_create_device<NVMeNameSpace>(StorageDevice::LUNAddress { controller.controller_id(), nsid, 0 }, controller.hardware_relative_controller_id(), move(queues), storage_size, lba_size, nsid));
return device;
}
UNMAP_AFTER_INIT NVMeNameSpace::NVMeNameSpace(LUNAddress logical_unit_number_address, u32 hardware_relative_controller_id, Vector<NonnullLockRefPtr<NVMeQueue>> queues, size_t max_addresable_block, size_t lba_size, u16 nsid)
: StorageDevice(logical_unit_number_address, hardware_relative_controller_id, lba_size, max_addresable_block)
, m_nsid(nsid)
, m_queues(move(queues))
{
}
void NVMeNameSpace::start_request(AsyncBlockDeviceRequest& request)
{
auto index = Processor::current_id();
auto& queue = m_queues.at(index);
// TODO: For now we support only IO transfers of size PAGE_SIZE (Going along with the current constraint in the block layer)
// Eventually remove this constraint by using the PRP2 field in the submission struct and remove block layer constraint for NVMe driver.
VERIFY(request.block_count() <= (PAGE_SIZE / block_size()));
if (request.request_type() == AsyncBlockDeviceRequest::Read) {
queue->read(request, m_nsid, request.block_index(), request.block_count());
} else {
queue->write(request, m_nsid, request.block_index(), request.block_count());
}
}
}

View file

@ -0,0 +1,38 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/OwnPtr.h>
#include <AK/Types.h>
#include <AK/kmalloc.h>
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
#include <Kernel/Devices/Storage/StorageDevice.h>
#include <Kernel/Library/LockRefPtr.h>
#include <Kernel/Library/NonnullLockRefPtr.h>
#include <Kernel/Locking/Spinlock.h>
namespace Kernel {
class NVMeController;
class NVMeNameSpace : public StorageDevice {
friend class DeviceManagement;
public:
static ErrorOr<NonnullLockRefPtr<NVMeNameSpace>> try_create(NVMeController const&, Vector<NonnullLockRefPtr<NVMeQueue>> queues, u16 nsid, size_t storage_size, size_t lba_size);
CommandSet command_set() const override { return CommandSet::NVMe; };
void start_request(AsyncBlockDeviceRequest& request) override;
private:
NVMeNameSpace(LUNAddress, u32 hardware_relative_controller_id, Vector<NonnullLockRefPtr<NVMeQueue>> queues, size_t storage_size, size_t lba_size, u16 nsid);
u16 m_nsid;
Vector<NonnullLockRefPtr<NVMeQueue>> m_queues;
};
}

View file

@ -0,0 +1,67 @@
/*
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/Delay.h>
#include <Kernel/Devices/BlockDevice.h>
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
#include <Kernel/Devices/Storage/NVMe/NVMePollQueue.h>
namespace Kernel {
ErrorOr<NonnullLockRefPtr<NVMePollQueue>> NVMePollQueue::try_create(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
{
return TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMePollQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))));
}
UNMAP_AFTER_INIT NVMePollQueue::NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
: NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))
{
}
void NVMePollQueue::submit_sqe(NVMeSubmission& sub)
{
NVMeQueue::submit_sqe(sub);
SpinlockLocker lock_cq(m_cq_lock);
while (!process_cq()) {
microseconds_delay(1);
}
}
void NVMePollQueue::complete_current_request(u16 cmdid, u16 status)
{
SpinlockLocker lock(m_request_lock);
auto& request_pdu = m_requests.get(cmdid).release_value();
auto current_request = request_pdu.request;
AsyncDeviceRequest::RequestResult req_result = AsyncDeviceRequest::Success;
ScopeGuard guard = [req_result, status, &request_pdu] {
if (request_pdu.request)
request_pdu.request->complete(req_result);
if (request_pdu.end_io_handler)
request_pdu.end_io_handler(status);
request_pdu.used = false;
};
// There can be submission without any request associated with it such as with
// admin queue commands during init. If there is no request, we are done
if (!current_request)
return;
if (status) {
req_result = AsyncBlockDeviceRequest::Failure;
return;
}
if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), current_request->buffer_size()); result.is_error()) {
req_result = AsyncBlockDeviceRequest::MemoryFault;
return;
}
}
return;
}
}

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
namespace Kernel {
class NVMePollQueue : public NVMeQueue {
public:
static ErrorOr<NonnullLockRefPtr<NVMePollQueue>> try_create(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
void submit_sqe(NVMeSubmission& submission) override;
virtual ~NVMePollQueue() override {};
protected:
NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
private:
virtual void complete_current_request(u16 cmdid, u16 status) override;
};
}

View file

@ -0,0 +1,181 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/Delay.h>
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
#include <Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h>
#include <Kernel/Devices/Storage/NVMe/NVMePollQueue.h>
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
#include <Kernel/StdLib.h>
namespace Kernel {
ErrorOr<NonnullLockRefPtr<NVMeQueue>> NVMeQueue::try_create(NVMeController& device, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs, QueueType queue_type)
{
// Note: Allocate DMA region for RW operation. For now the requests don't exceed more than 4096 bytes (Storage device takes care of it)
RefPtr<Memory::PhysicalPage> rw_dma_page;
auto rw_dma_region = TRY(MM.allocate_dma_buffer_page("NVMe Queue Read/Write DMA"sv, Memory::Region::Access::ReadWrite, rw_dma_page));
if (rw_dma_page.is_null())
return ENOMEM;
if (queue_type == QueueType::Polled) {
auto queue = NVMePollQueue::try_create(move(rw_dma_region), rw_dma_page.release_nonnull(), qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs));
return queue;
}
auto queue = NVMeInterruptQueue::try_create(device, move(rw_dma_region), rw_dma_page.release_nonnull(), qid, irq, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs));
return queue;
}
UNMAP_AFTER_INIT NVMeQueue::NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
: m_rw_dma_region(move(rw_dma_region))
, m_qid(qid)
, m_admin_queue(qid == 0)
, m_qdepth(q_depth)
, m_cq_dma_region(move(cq_dma_region))
, m_sq_dma_region(move(sq_dma_region))
, m_db_regs(move(db_regs))
, m_rw_dma_page(rw_dma_page)
{
m_requests.try_ensure_capacity(q_depth).release_value_but_fixme_should_propagate_errors();
m_sqe_array = { reinterpret_cast<NVMeSubmission*>(m_sq_dma_region->vaddr().as_ptr()), m_qdepth };
m_cqe_array = { reinterpret_cast<NVMeCompletion*>(m_cq_dma_region->vaddr().as_ptr()), m_qdepth };
}
bool NVMeQueue::cqe_available()
{
return PHASE_TAG(m_cqe_array[m_cq_head].status) == m_cq_valid_phase;
}
void NVMeQueue::update_cqe_head()
{
// To prevent overflow, use a temp variable
u32 temp_cq_head = m_cq_head + 1;
if (temp_cq_head == m_qdepth) {
m_cq_head = 0;
m_cq_valid_phase ^= 1;
} else {
m_cq_head = temp_cq_head;
}
}
u32 NVMeQueue::process_cq()
{
u32 nr_of_processed_cqes = 0;
while (cqe_available()) {
u16 status;
u16 cmdid;
++nr_of_processed_cqes;
status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status);
cmdid = m_cqe_array[m_cq_head].command_id;
dbgln_if(NVME_DEBUG, "NVMe: Completion with status {:x} and command identifier {}. CQ_HEAD: {}", status, cmdid, m_cq_head);
if (!m_requests.contains(cmdid)) {
dmesgln("Bogus cmd id: {}", cmdid);
VERIFY_NOT_REACHED();
}
complete_current_request(cmdid, status);
update_cqe_head();
}
if (nr_of_processed_cqes) {
update_cq_doorbell();
}
return nr_of_processed_cqes;
}
void NVMeQueue::submit_sqe(NVMeSubmission& sub)
{
SpinlockLocker lock(m_sq_lock);
memcpy(&m_sqe_array[m_sq_tail], &sub, sizeof(NVMeSubmission));
{
u32 temp_sq_tail = m_sq_tail + 1;
if (temp_sq_tail == m_qdepth)
m_sq_tail = 0;
else
m_sq_tail = temp_sq_tail;
}
dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail);
full_memory_barrier();
update_sq_doorbell();
}
u16 NVMeQueue::submit_sync_sqe(NVMeSubmission& sub)
{
// For now let's use sq tail as a unique command id.
u16 cmd_status;
u16 cid = get_request_cid();
sub.cmdid = cid;
{
SpinlockLocker req_lock(m_request_lock);
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
VERIFY_NOT_REACHED();
m_requests.set(sub.cmdid, { nullptr, true, [this, &cmd_status](u16 status) mutable { cmd_status = status; m_sync_wait_queue.wake_all(); } });
}
submit_sqe(sub);
// FIXME: Only sync submissions (usually used for admin commands) use a WaitQueue based IO. Eventually we need to
// move this logic into the block layer instead of sprinkling them in the driver code.
m_sync_wait_queue.wait_forever("NVMe sync submit"sv);
return cmd_status;
}
void NVMeQueue::read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
{
NVMeSubmission sub {};
sub.op = OP_NVME_READ;
sub.rw.nsid = nsid;
sub.rw.slba = AK::convert_between_host_and_little_endian(index);
// No. of lbas is 0 based
sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
sub.cmdid = get_request_cid();
{
SpinlockLocker req_lock(m_request_lock);
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
VERIFY_NOT_REACHED();
m_requests.set(sub.cmdid, { request, true, nullptr });
}
full_memory_barrier();
submit_sqe(sub);
}
void NVMeQueue::write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
{
NVMeSubmission sub {};
sub.op = OP_NVME_WRITE;
sub.rw.nsid = nsid;
sub.rw.slba = AK::convert_between_host_and_little_endian(index);
// No. of lbas is 0 based
sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
sub.cmdid = get_request_cid();
{
SpinlockLocker req_lock(m_request_lock);
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
VERIFY_NOT_REACHED();
m_requests.set(sub.cmdid, { request, true, nullptr });
}
if (auto result = request.read_from_buffer(request.buffer(), m_rw_dma_region->vaddr().as_ptr(), request.buffer_size()); result.is_error()) {
complete_current_request(sub.cmdid, AsyncDeviceRequest::MemoryFault);
return;
}
full_memory_barrier();
submit_sqe(sub);
}
UNMAP_AFTER_INIT NVMeQueue::~NVMeQueue() = default;
}

View file

@ -0,0 +1,106 @@
/*
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/AtomicRefCounted.h>
#include <AK/HashMap.h>
#include <AK/OwnPtr.h>
#include <AK/Types.h>
#include <Kernel/Bus/PCI/Device.h>
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
#include <Kernel/Interrupts/IRQHandler.h>
#include <Kernel/Library/LockRefPtr.h>
#include <Kernel/Library/NonnullLockRefPtr.h>
#include <Kernel/Locking/Spinlock.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/TypedMapping.h>
namespace Kernel {
struct DoorbellRegister {
u32 sq_tail;
u32 cq_head;
};
enum class QueueType {
Polled,
IRQ
};
class AsyncBlockDeviceRequest;
struct NVMeIO {
RefPtr<AsyncBlockDeviceRequest> request;
bool used = false;
Function<void(u16 status)> end_io_handler;
};
class NVMeController;
class NVMeQueue : public AtomicRefCounted<NVMeQueue> {
public:
static ErrorOr<NonnullLockRefPtr<NVMeQueue>> try_create(NVMeController& device, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs, QueueType queue_type);
bool is_admin_queue() { return m_admin_queue; };
u16 submit_sync_sqe(NVMeSubmission&);
void read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
void write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
virtual void submit_sqe(NVMeSubmission&);
virtual ~NVMeQueue();
protected:
u32 process_cq();
void update_sq_doorbell()
{
m_db_regs->sq_tail = m_sq_tail;
}
NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
[[nodiscard]] u32 get_request_cid()
{
u32 expected_tag = m_tag.load(AK::memory_order_acquire);
for (;;) {
u32 cid = expected_tag + 1;
if (cid == m_qdepth)
cid = 0;
if (m_tag.compare_exchange_strong(expected_tag, cid, AK::memory_order_acquire))
return cid;
}
}
private:
bool cqe_available();
void update_cqe_head();
virtual void complete_current_request(u16 cmdid, u16 status) = 0;
void update_cq_doorbell()
{
m_db_regs->cq_head = m_cq_head;
}
protected:
Spinlock<LockRank::Interrupts> m_cq_lock {};
HashMap<u16, NVMeIO> m_requests;
NonnullOwnPtr<Memory::Region> m_rw_dma_region;
Spinlock<LockRank::None> m_request_lock {};
private:
u16 m_qid {};
u8 m_cq_valid_phase { 1 };
u16 m_sq_tail {};
u16 m_cq_head {};
bool m_admin_queue { false };
u32 m_qdepth {};
Atomic<u32> m_tag { 0 }; // used for the cid in a submission queue entry
Spinlock<LockRank::Interrupts> m_sq_lock {};
OwnPtr<Memory::Region> m_cq_dma_region;
Span<NVMeSubmission> m_sqe_array;
OwnPtr<Memory::Region> m_sq_dma_region;
Span<NVMeCompletion> m_cqe_array;
WaitQueue m_sync_wait_queue;
Memory::TypedMapping<DoorbellRegister volatile> m_db_regs;
NonnullRefPtr<Memory::PhysicalPage const> const m_rw_dma_page;
};
}