mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 03:57:44 +00:00
Kernel: Move the Storage directory to be a new directory under Devices
The Storage subsystem, like the Audio and HID subsystems, exposes Unix device files (for example, in the /dev directory). To ensure consistency across the repository, we should make the Storage subsystem to reside in the Kernel/Devices directory like the two other mentioned subsystems.
This commit is contained in:
parent
f3a58f3a5a
commit
500b7b08d6
59 changed files with 133 additions and 133 deletions
360
Kernel/Devices/Storage/NVMe/NVMeController.cpp
Normal file
360
Kernel/Devices/Storage/NVMe/NVMeController.cpp
Normal file
|
@ -0,0 +1,360 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
* Copyright (c) 2022, the SerenityOS developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Format.h>
|
||||
#include <AK/Types.h>
|
||||
#include <Kernel/Arch/Delay.h>
|
||||
#include <Kernel/Arch/Interrupts.h>
|
||||
#include <Kernel/Arch/SafeMem.h>
|
||||
#include <Kernel/Bus/PCI/API.h>
|
||||
#include <Kernel/CommandLine.h>
|
||||
#include <Kernel/Devices/Device.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
|
||||
#include <Kernel/Devices/Storage/StorageManagement.h>
|
||||
#include <Kernel/Library/LockRefPtr.h>
|
||||
#include <Kernel/Sections.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<NonnullRefPtr<NVMeController>> NVMeController::try_initialize(Kernel::PCI::DeviceIdentifier const& device_identifier, bool is_queue_polled)
|
||||
{
|
||||
auto controller = TRY(adopt_nonnull_ref_or_enomem(new NVMeController(device_identifier, StorageManagement::generate_relative_nvme_controller_id({}))));
|
||||
TRY(controller->initialize(is_queue_polled));
|
||||
return controller;
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMeController::NVMeController(const PCI::DeviceIdentifier& device_identifier, u32 hardware_relative_controller_id)
|
||||
: PCI::Device(const_cast<PCI::DeviceIdentifier&>(device_identifier))
|
||||
, StorageController(hardware_relative_controller_id)
|
||||
{
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize(bool is_queue_polled)
|
||||
{
|
||||
// Nr of queues = one queue per core
|
||||
auto nr_of_queues = Processor::count();
|
||||
auto queue_type = is_queue_polled ? QueueType::Polled : QueueType::IRQ;
|
||||
|
||||
PCI::enable_memory_space(device_identifier());
|
||||
PCI::enable_bus_mastering(device_identifier());
|
||||
m_bar = PCI::get_BAR0(device_identifier()) & PCI::bar_address_mask;
|
||||
static_assert(sizeof(ControllerRegister) == REG_SQ0TDBL_START);
|
||||
static_assert(sizeof(NVMeSubmission) == (1 << SQ_WIDTH));
|
||||
|
||||
// Map only until doorbell register for the controller
|
||||
// Queues will individually map the doorbell register respectively
|
||||
m_controller_regs = TRY(Memory::map_typed_writable<ControllerRegister volatile>(PhysicalAddress(m_bar)));
|
||||
|
||||
auto caps = m_controller_regs->cap;
|
||||
m_ready_timeout = Duration::from_milliseconds((CAP_TO(caps) + 1) * 500); // CAP.TO is in 500ms units
|
||||
|
||||
calculate_doorbell_stride();
|
||||
// IO queues + 1 admin queue
|
||||
m_irq_type = TRY(reserve_irqs(nr_of_queues + 1, true));
|
||||
|
||||
TRY(create_admin_queue(queue_type));
|
||||
VERIFY(m_admin_queue_ready == true);
|
||||
|
||||
VERIFY(IO_QUEUE_SIZE < MQES(caps));
|
||||
dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE);
|
||||
|
||||
// Create an IO queue per core
|
||||
for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) {
|
||||
// qid is zero is used for admin queue
|
||||
TRY(create_io_queue(cpuid + 1, queue_type));
|
||||
}
|
||||
TRY(identify_and_init_namespaces());
|
||||
return {};
|
||||
}
|
||||
|
||||
bool NVMeController::wait_for_ready(bool expected_ready_bit_value)
|
||||
{
|
||||
constexpr size_t one_ms_io_delay = 1000;
|
||||
auto wait_iterations = m_ready_timeout.to_milliseconds();
|
||||
|
||||
u32 expected_rdy = expected_ready_bit_value ? 1 : 0;
|
||||
while (((m_controller_regs->csts >> CSTS_RDY_BIT) & 0x1) != expected_rdy) {
|
||||
microseconds_delay(one_ms_io_delay);
|
||||
|
||||
if (--wait_iterations == 0) {
|
||||
if (((m_controller_regs->csts >> CSTS_RDY_BIT) & 0x1) != expected_rdy) {
|
||||
dbgln_if(NVME_DEBUG, "NVMEController: CSTS.RDY still not set to {} after {} ms", expected_rdy, m_ready_timeout.to_milliseconds());
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ErrorOr<void> NVMeController::reset_controller()
|
||||
{
|
||||
if ((m_controller_regs->cc & (1 << CC_EN_BIT)) != 0) {
|
||||
// If the EN bit is already set, we need to wait
|
||||
// until the RDY bit is 1, otherwise the behavior is undefined
|
||||
if (!wait_for_ready(true))
|
||||
return Error::from_errno(ETIMEDOUT);
|
||||
}
|
||||
|
||||
auto cc = m_controller_regs->cc;
|
||||
|
||||
cc = cc & ~(1 << CC_EN_BIT);
|
||||
|
||||
m_controller_regs->cc = cc;
|
||||
|
||||
full_memory_barrier();
|
||||
|
||||
// Wait until the RDY bit is cleared
|
||||
if (!wait_for_ready(false))
|
||||
return Error::from_errno(ETIMEDOUT);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> NVMeController::start_controller()
|
||||
{
|
||||
if (!(m_controller_regs->cc & (1 << CC_EN_BIT))) {
|
||||
// If the EN bit is not already set, we need to wait
|
||||
// until the RDY bit is 0, otherwise the behavior is undefined
|
||||
if (!wait_for_ready(false))
|
||||
return Error::from_errno(ETIMEDOUT);
|
||||
}
|
||||
|
||||
auto cc = m_controller_regs->cc;
|
||||
|
||||
cc = cc | (1 << CC_EN_BIT);
|
||||
cc = cc | (CQ_WIDTH << CC_IOCQES_BIT);
|
||||
cc = cc | (SQ_WIDTH << CC_IOSQES_BIT);
|
||||
|
||||
m_controller_regs->cc = cc;
|
||||
|
||||
full_memory_barrier();
|
||||
|
||||
// Wait until the RDY bit is set
|
||||
if (!wait_for_ready(true))
|
||||
return Error::from_errno(ETIMEDOUT);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT u32 NVMeController::get_admin_q_dept()
|
||||
{
|
||||
u32 aqa = m_controller_regs->aqa;
|
||||
// Queue depth is 0 based
|
||||
u32 q_depth = min(ACQ_SIZE(aqa), ASQ_SIZE(aqa)) + 1;
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Admin queue depth is {}", q_depth);
|
||||
return q_depth;
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::identify_and_init_namespaces()
|
||||
{
|
||||
|
||||
RefPtr<Memory::PhysicalPage> prp_dma_buffer;
|
||||
OwnPtr<Memory::Region> prp_dma_region;
|
||||
auto namespace_data_struct = TRY(ByteBuffer::create_zeroed(NVMe_IDENTIFY_SIZE));
|
||||
u32 active_namespace_list[NVMe_IDENTIFY_SIZE / sizeof(u32)];
|
||||
|
||||
{
|
||||
auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP"sv, Memory::Region::Access::ReadWrite, prp_dma_buffer));
|
||||
prp_dma_region = move(buffer);
|
||||
}
|
||||
|
||||
// Get the active namespace
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
u16 status = 0;
|
||||
sub.op = OP_ADMIN_IDENTIFY;
|
||||
sub.identify.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
|
||||
sub.identify.cns = NVMe_CNS_ID_ACTIVE_NS & 0xff;
|
||||
status = submit_admin_command(sub, true);
|
||||
if (status) {
|
||||
dmesgln_pci(*this, "Failed to identify active namespace command");
|
||||
return EFAULT;
|
||||
}
|
||||
if (void* fault_at; !safe_memcpy(active_namespace_list, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
|
||||
return EFAULT;
|
||||
}
|
||||
}
|
||||
// Get the NAMESPACE attributes
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
IdentifyNamespace id_ns {};
|
||||
u16 status = 0;
|
||||
for (auto nsid : active_namespace_list) {
|
||||
memset(prp_dma_region->vaddr().as_ptr(), 0, NVMe_IDENTIFY_SIZE);
|
||||
// Invalid NS
|
||||
if (nsid == 0)
|
||||
break;
|
||||
sub.op = OP_ADMIN_IDENTIFY;
|
||||
sub.identify.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
|
||||
sub.identify.cns = NVMe_CNS_ID_NS & 0xff;
|
||||
sub.identify.nsid = nsid;
|
||||
status = submit_admin_command(sub, true);
|
||||
if (status) {
|
||||
dmesgln_pci(*this, "Failed identify namespace with nsid {}", nsid);
|
||||
return EFAULT;
|
||||
}
|
||||
static_assert(sizeof(IdentifyNamespace) == NVMe_IDENTIFY_SIZE);
|
||||
if (void* fault_at; !safe_memcpy(&id_ns, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
|
||||
return EFAULT;
|
||||
}
|
||||
auto val = get_ns_features(id_ns);
|
||||
auto block_counts = val.get<0>();
|
||||
auto block_size = 1 << val.get<1>();
|
||||
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Block count is {} and Block size is {}", block_counts, block_size);
|
||||
|
||||
m_namespaces.append(TRY(NVMeNameSpace::try_create(*this, m_queues, nsid, block_counts, block_size)));
|
||||
m_device_count++;
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Initialized namespace with NSID: {}", nsid);
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT Tuple<u64, u8> NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct)
|
||||
{
|
||||
auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK;
|
||||
auto namespace_size = identify_data_struct.nsze;
|
||||
auto lba_format = identify_data_struct.lbaf[flbas];
|
||||
|
||||
auto lba_size = (lba_format & LBA_SIZE_MASK) >> 16;
|
||||
return Tuple<u64, u8>(namespace_size, lba_size);
|
||||
}
|
||||
|
||||
LockRefPtr<StorageDevice> NVMeController::device(u32 index) const
|
||||
{
|
||||
return m_namespaces.at(index);
|
||||
}
|
||||
|
||||
size_t NVMeController::devices_count() const
|
||||
{
|
||||
return m_device_count;
|
||||
}
|
||||
|
||||
ErrorOr<void> NVMeController::reset()
|
||||
{
|
||||
TRY(reset_controller());
|
||||
TRY(start_controller());
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> NVMeController::shutdown()
|
||||
{
|
||||
return Error::from_errno(ENOTIMPL);
|
||||
}
|
||||
|
||||
void NVMeController::complete_current_request([[maybe_unused]] AsyncDeviceRequest::RequestResult result)
|
||||
{
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(QueueType queue_type)
|
||||
{
|
||||
auto qdepth = get_admin_q_dept();
|
||||
OwnPtr<Memory::Region> cq_dma_region;
|
||||
Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_pages;
|
||||
OwnPtr<Memory::Region> sq_dma_region;
|
||||
Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_pages;
|
||||
auto cq_size = round_up_to_power_of_two(CQ_SIZE(qdepth), 4096);
|
||||
auto sq_size = round_up_to_power_of_two(SQ_SIZE(qdepth), 4096);
|
||||
auto maybe_error = reset_controller();
|
||||
if (maybe_error.is_error()) {
|
||||
dmesgln_pci(*this, "Failed to reset the NVMe controller");
|
||||
return maybe_error;
|
||||
}
|
||||
{
|
||||
auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "Admin CQ queue"sv, Memory::Region::Access::ReadWrite, cq_dma_pages));
|
||||
cq_dma_region = move(buffer);
|
||||
}
|
||||
|
||||
// Phase bit is important to determine completion, so zero out the space
|
||||
// so that we don't get any garbage phase bit value
|
||||
memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
|
||||
|
||||
{
|
||||
auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "Admin SQ queue"sv, Memory::Region::Access::ReadWrite, sq_dma_pages));
|
||||
sq_dma_region = move(buffer);
|
||||
}
|
||||
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + REG_SQ0TDBL_START)));
|
||||
|
||||
m_controller_regs->acq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr()));
|
||||
m_controller_regs->asq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first()->paddr().as_ptr()));
|
||||
|
||||
auto irq = TRY(allocate_irq(0)); // Admin queue always uses the 0th index when using MSIx
|
||||
|
||||
maybe_error = start_controller();
|
||||
if (maybe_error.is_error()) {
|
||||
dmesgln_pci(*this, "Failed to restart the NVMe controller");
|
||||
return maybe_error;
|
||||
}
|
||||
set_admin_queue_ready_flag();
|
||||
m_admin_queue = TRY(NVMeQueue::try_create(*this, 0, irq, qdepth, move(cq_dma_region), move(sq_dma_region), move(doorbell_regs), queue_type));
|
||||
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Admin queue created");
|
||||
return {};
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 qid, QueueType queue_type)
|
||||
{
|
||||
OwnPtr<Memory::Region> cq_dma_region;
|
||||
Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_pages;
|
||||
OwnPtr<Memory::Region> sq_dma_region;
|
||||
Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_pages;
|
||||
auto cq_size = round_up_to_power_of_two(CQ_SIZE(IO_QUEUE_SIZE), 4096);
|
||||
auto sq_size = round_up_to_power_of_two(SQ_SIZE(IO_QUEUE_SIZE), 4096);
|
||||
|
||||
{
|
||||
auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "IO CQ queue"sv, Memory::Region::Access::ReadWrite, cq_dma_pages));
|
||||
cq_dma_region = move(buffer);
|
||||
}
|
||||
|
||||
// Phase bit is important to determine completion, so zero out the space
|
||||
// so that we don't get any garbage phase bit value
|
||||
memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
|
||||
|
||||
{
|
||||
auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "IO SQ queue"sv, Memory::Region::Access::ReadWrite, sq_dma_pages));
|
||||
sq_dma_region = move(buffer);
|
||||
}
|
||||
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
sub.op = OP_ADMIN_CREATE_COMPLETION_QUEUE;
|
||||
sub.create_cq.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first()->paddr().as_ptr()));
|
||||
sub.create_cq.cqid = qid;
|
||||
// The queue size is 0 based
|
||||
sub.create_cq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
|
||||
auto flags = (queue_type == QueueType::IRQ) ? QUEUE_IRQ_ENABLED : QUEUE_IRQ_DISABLED;
|
||||
flags |= QUEUE_PHY_CONTIGUOUS;
|
||||
// When using MSIx interrupts, qid is used as an index into the interrupt table
|
||||
sub.create_cq.irq_vector = (m_irq_type == PCI::InterruptType::PIN) ? 0 : qid;
|
||||
sub.create_cq.cq_flags = AK::convert_between_host_and_little_endian(flags & 0xFFFF);
|
||||
submit_admin_command(sub, true);
|
||||
}
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
sub.op = OP_ADMIN_CREATE_SUBMISSION_QUEUE;
|
||||
sub.create_sq.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first()->paddr().as_ptr()));
|
||||
sub.create_sq.sqid = qid;
|
||||
// The queue size is 0 based
|
||||
sub.create_sq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
|
||||
auto flags = QUEUE_PHY_CONTIGUOUS;
|
||||
sub.create_sq.cqid = qid;
|
||||
sub.create_sq.sq_flags = AK::convert_between_host_and_little_endian(flags);
|
||||
submit_admin_command(sub, true);
|
||||
}
|
||||
|
||||
auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride));
|
||||
auto doorbell_regs = TRY(Memory::map_typed_writable<DoorbellRegister volatile>(PhysicalAddress(m_bar + queue_doorbell_offset)));
|
||||
auto irq = TRY(allocate_irq(qid));
|
||||
|
||||
m_queues.append(TRY(NVMeQueue::try_create(*this, qid, irq, IO_QUEUE_SIZE, move(cq_dma_region), move(sq_dma_region), move(doorbell_regs), queue_type)));
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Created IO Queue with QID{}", m_queues.size());
|
||||
return {};
|
||||
}
|
||||
}
|
84
Kernel/Devices/Storage/NVMe/NVMeController.h
Normal file
84
Kernel/Devices/Storage/NVMe/NVMeController.h
Normal file
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/Time.h>
|
||||
#include <AK/Tuple.h>
|
||||
#include <AK/Types.h>
|
||||
#include <Kernel/Bus/PCI/Device.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeNameSpace.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
|
||||
#include <Kernel/Devices/Storage/StorageController.h>
|
||||
#include <Kernel/Library/LockRefPtr.h>
|
||||
#include <Kernel/Library/NonnullLockRefPtr.h>
|
||||
#include <Kernel/Locking/Spinlock.h>
|
||||
#include <Kernel/Memory/TypedMapping.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class NVMeController : public PCI::Device
|
||||
, public StorageController {
|
||||
public:
|
||||
static ErrorOr<NonnullRefPtr<NVMeController>> try_initialize(PCI::DeviceIdentifier const&, bool is_queue_polled);
|
||||
ErrorOr<void> initialize(bool is_queue_polled);
|
||||
LockRefPtr<StorageDevice> device(u32 index) const override;
|
||||
size_t devices_count() const override;
|
||||
virtual StringView device_name() const override { return "NVMeController"sv; }
|
||||
|
||||
protected:
|
||||
ErrorOr<void> reset() override;
|
||||
ErrorOr<void> shutdown() override;
|
||||
void complete_current_request(AsyncDeviceRequest::RequestResult result) override;
|
||||
|
||||
public:
|
||||
ErrorOr<void> reset_controller();
|
||||
ErrorOr<void> start_controller();
|
||||
u32 get_admin_q_dept();
|
||||
|
||||
u16 submit_admin_command(NVMeSubmission& sub, bool sync = false)
|
||||
{
|
||||
// First queue is always the admin queue
|
||||
if (sync) {
|
||||
return m_admin_queue->submit_sync_sqe(sub);
|
||||
}
|
||||
m_admin_queue->submit_sqe(sub);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool is_admin_queue_ready() { return m_admin_queue_ready; };
|
||||
void set_admin_queue_ready_flag() { m_admin_queue_ready = true; };
|
||||
|
||||
private:
|
||||
NVMeController(PCI::DeviceIdentifier const&, u32 hardware_relative_controller_id);
|
||||
|
||||
ErrorOr<void> identify_and_init_namespaces();
|
||||
Tuple<u64, u8> get_ns_features(IdentifyNamespace& identify_data_struct);
|
||||
ErrorOr<void> create_admin_queue(QueueType queue_type);
|
||||
ErrorOr<void> create_io_queue(u8 qid, QueueType queue_type);
|
||||
void calculate_doorbell_stride()
|
||||
{
|
||||
m_dbl_stride = (m_controller_regs->cap >> CAP_DBL_SHIFT) & CAP_DBL_MASK;
|
||||
}
|
||||
bool wait_for_ready(bool);
|
||||
|
||||
private:
|
||||
LockRefPtr<NVMeQueue> m_admin_queue;
|
||||
Vector<NonnullLockRefPtr<NVMeQueue>> m_queues;
|
||||
Vector<NonnullLockRefPtr<NVMeNameSpace>> m_namespaces;
|
||||
Memory::TypedMapping<ControllerRegister volatile> m_controller_regs;
|
||||
bool m_admin_queue_ready { false };
|
||||
size_t m_device_count { 0 };
|
||||
AK::Duration m_ready_timeout;
|
||||
u32 m_bar { 0 };
|
||||
u8 m_dbl_stride { 0 };
|
||||
PCI::InterruptType m_irq_type;
|
||||
QueueType m_queue_type { QueueType::IRQ };
|
||||
static Atomic<u8> s_controller_id;
|
||||
};
|
||||
}
|
216
Kernel/Devices/Storage/NVMe/NVMeDefinitions.h
Normal file
216
Kernel/Devices/Storage/NVMe/NVMeDefinitions.h
Normal file
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Endian.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
struct ControllerRegister {
|
||||
u64 cap;
|
||||
u32 vs;
|
||||
u32 intms;
|
||||
u32 intmc;
|
||||
u32 cc;
|
||||
u32 rsvd1;
|
||||
u32 csts;
|
||||
u32 nssr;
|
||||
u32 aqa;
|
||||
u64 asq;
|
||||
u64 acq;
|
||||
u64 rsvd2[505];
|
||||
};
|
||||
|
||||
struct IdentifyNamespace {
|
||||
u64 nsze;
|
||||
u64 ncap;
|
||||
u8 rsdv1[10];
|
||||
u8 flbas;
|
||||
u8 rsvd2[100];
|
||||
u32 lbaf[16];
|
||||
u64 rsvd3[488];
|
||||
};
|
||||
|
||||
// DOORBELL
|
||||
static constexpr u32 REG_SQ0TDBL_START = 0x1000;
|
||||
static constexpr u32 REG_SQ0TDBL_END = 0x1003;
|
||||
static constexpr u8 DBL_REG_SIZE = 8;
|
||||
// CAP
|
||||
static constexpr u8 CAP_DBL_SHIFT = 32;
|
||||
static constexpr u8 CAP_DBL_MASK = 0xf;
|
||||
static constexpr u8 CAP_TO_SHIFT = 24;
|
||||
static constexpr u64 CAP_TO_MASK = 0xff << CAP_TO_SHIFT;
|
||||
static constexpr u16 MQES(u64 cap)
|
||||
{
|
||||
return (cap & 0xffff) + 1;
|
||||
}
|
||||
|
||||
static constexpr u32 CAP_TO(u64 cap)
|
||||
{
|
||||
return (cap & CAP_TO_MASK) >> CAP_TO_SHIFT;
|
||||
}
|
||||
|
||||
// CC – Controller Configuration
|
||||
static constexpr u8 CC_EN_BIT = 0x0;
|
||||
static constexpr u8 CSTS_RDY_BIT = 0x0;
|
||||
static constexpr u8 CSTS_SHST_SHIFT = 2;
|
||||
static constexpr u32 CSTS_SHST_MASK = 0x3 << CSTS_SHST_SHIFT;
|
||||
static constexpr u8 CC_IOSQES_BIT = 16;
|
||||
static constexpr u8 CC_IOCQES_BIT = 20;
|
||||
|
||||
static constexpr u32 CSTS_SHST(u32 x)
|
||||
{
|
||||
return (x & CSTS_SHST_MASK) >> CSTS_SHST_SHIFT;
|
||||
}
|
||||
|
||||
static constexpr u16 CC_AQA_MASK = (0xfff);
|
||||
static constexpr u16 ACQ_SIZE(u32 x)
|
||||
{
|
||||
return (x >> 16) & CC_AQA_MASK;
|
||||
}
|
||||
static constexpr u16 ASQ_SIZE(u32 x)
|
||||
{
|
||||
return x & CC_AQA_MASK;
|
||||
}
|
||||
static constexpr u8 CQ_WIDTH = 4; // CQ is 16 bytes(2^4) in size.
|
||||
static constexpr u8 SQ_WIDTH = 6; // SQ size is 64 bytes(2^6) in size.
|
||||
static constexpr u16 CQ_SIZE(u16 q_depth)
|
||||
{
|
||||
return q_depth << CQ_WIDTH;
|
||||
}
|
||||
static constexpr u16 SQ_SIZE(u16 q_depth)
|
||||
{
|
||||
return q_depth << SQ_WIDTH;
|
||||
}
|
||||
static constexpr u8 PHASE_TAG(u16 x)
|
||||
{
|
||||
return x & 0x1;
|
||||
}
|
||||
static constexpr u16 CQ_STATUS_FIELD_MASK = 0xfffe;
|
||||
static constexpr u16 CQ_STATUS_FIELD(u16 x)
|
||||
{
|
||||
return (x & CQ_STATUS_FIELD_MASK) >> 1;
|
||||
}
|
||||
|
||||
static constexpr u16 IO_QUEUE_SIZE = 64; // TODO:Need to be configurable
|
||||
|
||||
// IDENTIFY
|
||||
static constexpr u16 NVMe_IDENTIFY_SIZE = 4096;
|
||||
static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2;
|
||||
static constexpr u8 NVMe_CNS_ID_NS = 0x0;
|
||||
static constexpr u8 FLBA_SIZE_INDEX = 26;
|
||||
static constexpr u8 FLBA_SIZE_MASK = 0xf;
|
||||
static constexpr u8 LBA_FORMAT_SUPPORT_INDEX = 128;
|
||||
static constexpr u32 LBA_SIZE_MASK = 0x00ff0000;
|
||||
|
||||
// OPCODES
|
||||
// ADMIN COMMAND SET
|
||||
enum AdminCommandOpCode {
|
||||
OP_ADMIN_CREATE_COMPLETION_QUEUE = 0x5,
|
||||
OP_ADMIN_CREATE_SUBMISSION_QUEUE = 0x1,
|
||||
OP_ADMIN_IDENTIFY = 0x6,
|
||||
};
|
||||
|
||||
// IO opcodes
|
||||
enum IOCommandOpcode {
|
||||
OP_NVME_WRITE = 0x1,
|
||||
OP_NVME_READ = 0x2
|
||||
};
|
||||
|
||||
// FLAGS
|
||||
static constexpr u8 QUEUE_PHY_CONTIGUOUS = (1 << 0);
|
||||
static constexpr u8 QUEUE_IRQ_ENABLED = (1 << 1);
|
||||
static constexpr u8 QUEUE_IRQ_DISABLED = (0 << 1);
|
||||
|
||||
struct [[gnu::packed]] NVMeCompletion {
|
||||
LittleEndian<u32> cmd_spec;
|
||||
LittleEndian<u32> res;
|
||||
|
||||
LittleEndian<u16> sq_head; /* how much of this queue may be reclaimed */
|
||||
LittleEndian<u16> sq_id; /* submission queue that generated this entry */
|
||||
|
||||
u16 command_id; /* of the command which completed */
|
||||
LittleEndian<u16> status; /* did the command fail, and if so, why? */
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] DataPtr {
|
||||
LittleEndian<u64> prp1;
|
||||
LittleEndian<u64> prp2;
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeGenericCmd {
|
||||
LittleEndian<u32> nsid;
|
||||
LittleEndian<u64> rsvd;
|
||||
LittleEndian<u64> metadata;
|
||||
struct DataPtr data_ptr;
|
||||
LittleEndian<u32> cdw10;
|
||||
LittleEndian<u32> cdw11;
|
||||
LittleEndian<u32> cdw12;
|
||||
LittleEndian<u32> cdw13;
|
||||
LittleEndian<u32> cdw14;
|
||||
LittleEndian<u32> cdw15;
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeRWCmd {
|
||||
LittleEndian<u32> nsid;
|
||||
LittleEndian<u64> rsvd;
|
||||
LittleEndian<u64> metadata;
|
||||
struct DataPtr data_ptr;
|
||||
LittleEndian<u64> slba;
|
||||
LittleEndian<u16> length;
|
||||
LittleEndian<u16> control;
|
||||
LittleEndian<u32> dsmgmt;
|
||||
LittleEndian<u32> reftag;
|
||||
LittleEndian<u16> apptag;
|
||||
LittleEndian<u16> appmask;
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeIdentifyCmd {
|
||||
LittleEndian<u32> nsid;
|
||||
LittleEndian<u64> rsvd1[2];
|
||||
struct DataPtr data_ptr;
|
||||
u8 cns;
|
||||
u8 rsvd2;
|
||||
LittleEndian<u16> ctrlid;
|
||||
u8 rsvd3[3];
|
||||
u8 csi;
|
||||
u64 rsvd4[2];
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeCreateCQCmd {
|
||||
u32 rsvd1[5];
|
||||
LittleEndian<u64> prp1;
|
||||
u64 rsvd2;
|
||||
LittleEndian<u16> cqid;
|
||||
LittleEndian<u16> qsize;
|
||||
LittleEndian<u16> cq_flags;
|
||||
LittleEndian<u16> irq_vector;
|
||||
u64 rsvd12[2];
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeCreateSQCmd {
|
||||
u32 rsvd1[5];
|
||||
LittleEndian<u64> prp1;
|
||||
u64 rsvd2;
|
||||
LittleEndian<u16> sqid;
|
||||
LittleEndian<u16> qsize;
|
||||
LittleEndian<u16> sq_flags;
|
||||
LittleEndian<u16> cqid;
|
||||
u64 rsvd12[2];
|
||||
};
|
||||
|
||||
struct [[gnu::packed]] NVMeSubmission {
|
||||
u8 op;
|
||||
u8 flags;
|
||||
LittleEndian<u16> cmdid;
|
||||
union [[gnu::packed]] {
|
||||
NVMeGenericCmd generic;
|
||||
NVMeIdentifyCmd identify;
|
||||
NVMeRWCmd rw;
|
||||
NVMeCreateCQCmd create_cq;
|
||||
NVMeCreateSQCmd create_sq;
|
||||
};
|
||||
};
|
93
Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.cpp
Normal file
93
Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.cpp
Normal file
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <Kernel/Devices/BlockDevice.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h>
|
||||
#include <Kernel/WorkQueue.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
ErrorOr<NonnullLockRefPtr<NVMeInterruptQueue>> NVMeInterruptQueue::try_create(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
|
||||
{
|
||||
auto queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMeInterruptQueue(device, move(rw_dma_region), rw_dma_page, qid, irq, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))));
|
||||
queue->initialize_interrupt_queue();
|
||||
return queue;
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMeInterruptQueue::NVMeInterruptQueue(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
|
||||
: NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))
|
||||
, PCIIRQHandler(device, irq)
|
||||
{
|
||||
}
|
||||
|
||||
void NVMeInterruptQueue::initialize_interrupt_queue()
|
||||
{
|
||||
enable_irq();
|
||||
}
|
||||
|
||||
bool NVMeInterruptQueue::handle_irq(RegisterState const&)
|
||||
{
|
||||
SpinlockLocker lock(m_request_lock);
|
||||
return process_cq() ? true : false;
|
||||
}
|
||||
|
||||
void NVMeInterruptQueue::submit_sqe(NVMeSubmission& sub)
|
||||
{
|
||||
NVMeQueue::submit_sqe(sub);
|
||||
}
|
||||
|
||||
void NVMeInterruptQueue::complete_current_request(u16 cmdid, u16 status)
|
||||
{
|
||||
auto work_item_creation_result = g_io_work->try_queue([this, cmdid, status]() {
|
||||
SpinlockLocker lock(m_request_lock);
|
||||
auto& request_pdu = m_requests.get(cmdid).release_value();
|
||||
auto current_request = request_pdu.request;
|
||||
AsyncDeviceRequest::RequestResult req_result = AsyncDeviceRequest::Success;
|
||||
|
||||
ScopeGuard guard = [req_result, status, &request_pdu, &lock] {
|
||||
// FIXME: We should unlock at the end of this function to make sure no new requests is inserted
|
||||
// before we complete the request and calling end_io_handler but that results in a deadlock
|
||||
// For now this is avoided by asserting the `used` field while inserting.
|
||||
lock.unlock();
|
||||
if (request_pdu.request)
|
||||
request_pdu.request->complete(req_result);
|
||||
if (request_pdu.end_io_handler)
|
||||
request_pdu.end_io_handler(status);
|
||||
request_pdu.used = false;
|
||||
};
|
||||
|
||||
// There can be submission without any request associated with it such as with
|
||||
// admin queue commands during init. If there is no request, we are done
|
||||
if (!current_request)
|
||||
return;
|
||||
|
||||
if (status) {
|
||||
req_result = AsyncBlockDeviceRequest::Failure;
|
||||
return;
|
||||
}
|
||||
|
||||
if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
|
||||
if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), current_request->buffer_size()); result.is_error()) {
|
||||
req_result = AsyncBlockDeviceRequest::MemoryFault;
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
if (work_item_creation_result.is_error()) {
|
||||
SpinlockLocker lock(m_request_lock);
|
||||
auto& request_pdu = m_requests.get(cmdid).release_value();
|
||||
auto current_request = request_pdu.request;
|
||||
|
||||
current_request->complete(AsyncDeviceRequest::OutOfMemory);
|
||||
if (request_pdu.end_io_handler)
|
||||
request_pdu.end_io_handler(status);
|
||||
request_pdu.used = false;
|
||||
}
|
||||
}
|
||||
}
|
30
Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h
Normal file
30
Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
|
||||
#include <Kernel/Interrupts/PCIIRQHandler.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class NVMeInterruptQueue : public NVMeQueue
|
||||
, public PCIIRQHandler {
|
||||
public:
|
||||
static ErrorOr<NonnullLockRefPtr<NVMeInterruptQueue>> try_create(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
|
||||
void submit_sqe(NVMeSubmission& submission) override;
|
||||
virtual ~NVMeInterruptQueue() override {};
|
||||
virtual StringView purpose() const override { return "NVMe"sv; };
|
||||
void initialize_interrupt_queue();
|
||||
|
||||
protected:
|
||||
NVMeInterruptQueue(PCI::Device& device, NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
|
||||
|
||||
private:
|
||||
virtual void complete_current_request(u16 cmdid, u16 status) override;
|
||||
bool handle_irq(RegisterState const&) override;
|
||||
};
|
||||
}
|
42
Kernel/Devices/Storage/NVMe/NVMeNameSpace.cpp
Normal file
42
Kernel/Devices/Storage/NVMe/NVMeNameSpace.cpp
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <Kernel/Devices/DeviceManagement.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeNameSpace.h>
|
||||
#include <Kernel/Devices/Storage/StorageManagement.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
UNMAP_AFTER_INIT ErrorOr<NonnullLockRefPtr<NVMeNameSpace>> NVMeNameSpace::try_create(NVMeController const& controller, Vector<NonnullLockRefPtr<NVMeQueue>> queues, u16 nsid, size_t storage_size, size_t lba_size)
|
||||
{
|
||||
auto device = TRY(DeviceManagement::try_create_device<NVMeNameSpace>(StorageDevice::LUNAddress { controller.controller_id(), nsid, 0 }, controller.hardware_relative_controller_id(), move(queues), storage_size, lba_size, nsid));
|
||||
return device;
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMeNameSpace::NVMeNameSpace(LUNAddress logical_unit_number_address, u32 hardware_relative_controller_id, Vector<NonnullLockRefPtr<NVMeQueue>> queues, size_t max_addresable_block, size_t lba_size, u16 nsid)
|
||||
: StorageDevice(logical_unit_number_address, hardware_relative_controller_id, lba_size, max_addresable_block)
|
||||
, m_nsid(nsid)
|
||||
, m_queues(move(queues))
|
||||
{
|
||||
}
|
||||
|
||||
void NVMeNameSpace::start_request(AsyncBlockDeviceRequest& request)
|
||||
{
|
||||
auto index = Processor::current_id();
|
||||
auto& queue = m_queues.at(index);
|
||||
// TODO: For now we support only IO transfers of size PAGE_SIZE (Going along with the current constraint in the block layer)
|
||||
// Eventually remove this constraint by using the PRP2 field in the submission struct and remove block layer constraint for NVMe driver.
|
||||
VERIFY(request.block_count() <= (PAGE_SIZE / block_size()));
|
||||
|
||||
if (request.request_type() == AsyncBlockDeviceRequest::Read) {
|
||||
queue->read(request, m_nsid, request.block_index(), request.block_count());
|
||||
} else {
|
||||
queue->write(request, m_nsid, request.block_index(), request.block_count());
|
||||
}
|
||||
}
|
||||
}
|
38
Kernel/Devices/Storage/NVMe/NVMeNameSpace.h
Normal file
38
Kernel/Devices/Storage/NVMe/NVMeNameSpace.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/kmalloc.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
|
||||
#include <Kernel/Devices/Storage/StorageDevice.h>
|
||||
#include <Kernel/Library/LockRefPtr.h>
|
||||
#include <Kernel/Library/NonnullLockRefPtr.h>
|
||||
#include <Kernel/Locking/Spinlock.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class NVMeController;
|
||||
class NVMeNameSpace : public StorageDevice {
|
||||
friend class DeviceManagement;
|
||||
|
||||
public:
|
||||
static ErrorOr<NonnullLockRefPtr<NVMeNameSpace>> try_create(NVMeController const&, Vector<NonnullLockRefPtr<NVMeQueue>> queues, u16 nsid, size_t storage_size, size_t lba_size);
|
||||
|
||||
CommandSet command_set() const override { return CommandSet::NVMe; };
|
||||
void start_request(AsyncBlockDeviceRequest& request) override;
|
||||
|
||||
private:
|
||||
NVMeNameSpace(LUNAddress, u32 hardware_relative_controller_id, Vector<NonnullLockRefPtr<NVMeQueue>> queues, size_t storage_size, size_t lba_size, u16 nsid);
|
||||
|
||||
u16 m_nsid;
|
||||
Vector<NonnullLockRefPtr<NVMeQueue>> m_queues;
|
||||
};
|
||||
|
||||
}
|
67
Kernel/Devices/Storage/NVMe/NVMePollQueue.cpp
Normal file
67
Kernel/Devices/Storage/NVMe/NVMePollQueue.cpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <Kernel/Arch/Delay.h>
|
||||
#include <Kernel/Devices/BlockDevice.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMePollQueue.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
ErrorOr<NonnullLockRefPtr<NVMePollQueue>> NVMePollQueue::try_create(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
|
||||
{
|
||||
return TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMePollQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))));
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMePollQueue::NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
|
||||
: NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs))
|
||||
{
|
||||
}
|
||||
|
||||
void NVMePollQueue::submit_sqe(NVMeSubmission& sub)
|
||||
{
|
||||
NVMeQueue::submit_sqe(sub);
|
||||
SpinlockLocker lock_cq(m_cq_lock);
|
||||
while (!process_cq()) {
|
||||
microseconds_delay(1);
|
||||
}
|
||||
}
|
||||
|
||||
void NVMePollQueue::complete_current_request(u16 cmdid, u16 status)
|
||||
{
|
||||
SpinlockLocker lock(m_request_lock);
|
||||
auto& request_pdu = m_requests.get(cmdid).release_value();
|
||||
auto current_request = request_pdu.request;
|
||||
AsyncDeviceRequest::RequestResult req_result = AsyncDeviceRequest::Success;
|
||||
|
||||
ScopeGuard guard = [req_result, status, &request_pdu] {
|
||||
if (request_pdu.request)
|
||||
request_pdu.request->complete(req_result);
|
||||
if (request_pdu.end_io_handler)
|
||||
request_pdu.end_io_handler(status);
|
||||
request_pdu.used = false;
|
||||
};
|
||||
|
||||
// There can be submission without any request associated with it such as with
|
||||
// admin queue commands during init. If there is no request, we are done
|
||||
if (!current_request)
|
||||
return;
|
||||
|
||||
if (status) {
|
||||
req_result = AsyncBlockDeviceRequest::Failure;
|
||||
return;
|
||||
}
|
||||
|
||||
if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
|
||||
if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), current_request->buffer_size()); result.is_error()) {
|
||||
req_result = AsyncBlockDeviceRequest::MemoryFault;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
25
Kernel/Devices/Storage/NVMe/NVMePollQueue.h
Normal file
25
Kernel/Devices/Storage/NVMe/NVMePollQueue.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class NVMePollQueue : public NVMeQueue {
|
||||
public:
|
||||
static ErrorOr<NonnullLockRefPtr<NVMePollQueue>> try_create(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
|
||||
void submit_sqe(NVMeSubmission& submission) override;
|
||||
virtual ~NVMePollQueue() override {};
|
||||
|
||||
protected:
|
||||
NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, NonnullRefPtr<Memory::PhysicalPage> rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
|
||||
|
||||
private:
|
||||
virtual void complete_current_request(u16 cmdid, u16 status) override;
|
||||
};
|
||||
}
|
181
Kernel/Devices/Storage/NVMe/NVMeQueue.cpp
Normal file
181
Kernel/Devices/Storage/NVMe/NVMeQueue.cpp
Normal file
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <Kernel/Arch/Delay.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeController.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeInterruptQueue.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMePollQueue.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeQueue.h>
|
||||
#include <Kernel/StdLib.h>
|
||||
|
||||
namespace Kernel {
|
||||
ErrorOr<NonnullLockRefPtr<NVMeQueue>> NVMeQueue::try_create(NVMeController& device, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs, QueueType queue_type)
|
||||
{
|
||||
// Note: Allocate DMA region for RW operation. For now the requests don't exceed more than 4096 bytes (Storage device takes care of it)
|
||||
RefPtr<Memory::PhysicalPage> rw_dma_page;
|
||||
auto rw_dma_region = TRY(MM.allocate_dma_buffer_page("NVMe Queue Read/Write DMA"sv, Memory::Region::Access::ReadWrite, rw_dma_page));
|
||||
|
||||
if (rw_dma_page.is_null())
|
||||
return ENOMEM;
|
||||
|
||||
if (queue_type == QueueType::Polled) {
|
||||
auto queue = NVMePollQueue::try_create(move(rw_dma_region), rw_dma_page.release_nonnull(), qid, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs));
|
||||
return queue;
|
||||
}
|
||||
|
||||
auto queue = NVMeInterruptQueue::try_create(device, move(rw_dma_region), rw_dma_page.release_nonnull(), qid, irq, q_depth, move(cq_dma_region), move(sq_dma_region), move(db_regs));
|
||||
return queue;
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMeQueue::NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
|
||||
: m_rw_dma_region(move(rw_dma_region))
|
||||
, m_qid(qid)
|
||||
, m_admin_queue(qid == 0)
|
||||
, m_qdepth(q_depth)
|
||||
, m_cq_dma_region(move(cq_dma_region))
|
||||
, m_sq_dma_region(move(sq_dma_region))
|
||||
, m_db_regs(move(db_regs))
|
||||
, m_rw_dma_page(rw_dma_page)
|
||||
|
||||
{
|
||||
m_requests.try_ensure_capacity(q_depth).release_value_but_fixme_should_propagate_errors();
|
||||
m_sqe_array = { reinterpret_cast<NVMeSubmission*>(m_sq_dma_region->vaddr().as_ptr()), m_qdepth };
|
||||
m_cqe_array = { reinterpret_cast<NVMeCompletion*>(m_cq_dma_region->vaddr().as_ptr()), m_qdepth };
|
||||
}
|
||||
|
||||
bool NVMeQueue::cqe_available()
|
||||
{
|
||||
return PHASE_TAG(m_cqe_array[m_cq_head].status) == m_cq_valid_phase;
|
||||
}
|
||||
|
||||
void NVMeQueue::update_cqe_head()
|
||||
{
|
||||
// To prevent overflow, use a temp variable
|
||||
u32 temp_cq_head = m_cq_head + 1;
|
||||
if (temp_cq_head == m_qdepth) {
|
||||
m_cq_head = 0;
|
||||
m_cq_valid_phase ^= 1;
|
||||
} else {
|
||||
m_cq_head = temp_cq_head;
|
||||
}
|
||||
}
|
||||
|
||||
u32 NVMeQueue::process_cq()
|
||||
{
|
||||
u32 nr_of_processed_cqes = 0;
|
||||
while (cqe_available()) {
|
||||
u16 status;
|
||||
u16 cmdid;
|
||||
++nr_of_processed_cqes;
|
||||
status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status);
|
||||
cmdid = m_cqe_array[m_cq_head].command_id;
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Completion with status {:x} and command identifier {}. CQ_HEAD: {}", status, cmdid, m_cq_head);
|
||||
|
||||
if (!m_requests.contains(cmdid)) {
|
||||
dmesgln("Bogus cmd id: {}", cmdid);
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
complete_current_request(cmdid, status);
|
||||
update_cqe_head();
|
||||
}
|
||||
if (nr_of_processed_cqes) {
|
||||
update_cq_doorbell();
|
||||
}
|
||||
return nr_of_processed_cqes;
|
||||
}
|
||||
|
||||
void NVMeQueue::submit_sqe(NVMeSubmission& sub)
|
||||
{
|
||||
SpinlockLocker lock(m_sq_lock);
|
||||
|
||||
memcpy(&m_sqe_array[m_sq_tail], &sub, sizeof(NVMeSubmission));
|
||||
{
|
||||
u32 temp_sq_tail = m_sq_tail + 1;
|
||||
if (temp_sq_tail == m_qdepth)
|
||||
m_sq_tail = 0;
|
||||
else
|
||||
m_sq_tail = temp_sq_tail;
|
||||
}
|
||||
|
||||
dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail);
|
||||
full_memory_barrier();
|
||||
update_sq_doorbell();
|
||||
}
|
||||
|
||||
u16 NVMeQueue::submit_sync_sqe(NVMeSubmission& sub)
|
||||
{
|
||||
// For now let's use sq tail as a unique command id.
|
||||
u16 cmd_status;
|
||||
u16 cid = get_request_cid();
|
||||
sub.cmdid = cid;
|
||||
|
||||
{
|
||||
SpinlockLocker req_lock(m_request_lock);
|
||||
|
||||
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
|
||||
VERIFY_NOT_REACHED();
|
||||
m_requests.set(sub.cmdid, { nullptr, true, [this, &cmd_status](u16 status) mutable { cmd_status = status; m_sync_wait_queue.wake_all(); } });
|
||||
}
|
||||
submit_sqe(sub);
|
||||
|
||||
// FIXME: Only sync submissions (usually used for admin commands) use a WaitQueue based IO. Eventually we need to
|
||||
// move this logic into the block layer instead of sprinkling them in the driver code.
|
||||
m_sync_wait_queue.wait_forever("NVMe sync submit"sv);
|
||||
return cmd_status;
|
||||
}
|
||||
|
||||
void NVMeQueue::read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
sub.op = OP_NVME_READ;
|
||||
sub.rw.nsid = nsid;
|
||||
sub.rw.slba = AK::convert_between_host_and_little_endian(index);
|
||||
// No. of lbas is 0 based
|
||||
sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
|
||||
sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
|
||||
sub.cmdid = get_request_cid();
|
||||
|
||||
{
|
||||
SpinlockLocker req_lock(m_request_lock);
|
||||
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
|
||||
VERIFY_NOT_REACHED();
|
||||
m_requests.set(sub.cmdid, { request, true, nullptr });
|
||||
}
|
||||
|
||||
full_memory_barrier();
|
||||
submit_sqe(sub);
|
||||
}
|
||||
|
||||
void NVMeQueue::write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
|
||||
{
|
||||
NVMeSubmission sub {};
|
||||
|
||||
sub.op = OP_NVME_WRITE;
|
||||
sub.rw.nsid = nsid;
|
||||
sub.rw.slba = AK::convert_between_host_and_little_endian(index);
|
||||
// No. of lbas is 0 based
|
||||
sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
|
||||
sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
|
||||
sub.cmdid = get_request_cid();
|
||||
|
||||
{
|
||||
SpinlockLocker req_lock(m_request_lock);
|
||||
if (m_requests.contains(sub.cmdid) && m_requests.get(sub.cmdid).release_value().used)
|
||||
VERIFY_NOT_REACHED();
|
||||
m_requests.set(sub.cmdid, { request, true, nullptr });
|
||||
}
|
||||
|
||||
if (auto result = request.read_from_buffer(request.buffer(), m_rw_dma_region->vaddr().as_ptr(), request.buffer_size()); result.is_error()) {
|
||||
complete_current_request(sub.cmdid, AsyncDeviceRequest::MemoryFault);
|
||||
return;
|
||||
}
|
||||
|
||||
full_memory_barrier();
|
||||
submit_sqe(sub);
|
||||
}
|
||||
|
||||
UNMAP_AFTER_INIT NVMeQueue::~NVMeQueue() = default;
|
||||
}
|
106
Kernel/Devices/Storage/NVMe/NVMeQueue.h
Normal file
106
Kernel/Devices/Storage/NVMe/NVMeQueue.h
Normal file
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/AtomicRefCounted.h>
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/Types.h>
|
||||
#include <Kernel/Bus/PCI/Device.h>
|
||||
#include <Kernel/Devices/Storage/NVMe/NVMeDefinitions.h>
|
||||
#include <Kernel/Interrupts/IRQHandler.h>
|
||||
#include <Kernel/Library/LockRefPtr.h>
|
||||
#include <Kernel/Library/NonnullLockRefPtr.h>
|
||||
#include <Kernel/Locking/Spinlock.h>
|
||||
#include <Kernel/Memory/MemoryManager.h>
|
||||
#include <Kernel/Memory/TypedMapping.h>
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
struct DoorbellRegister {
|
||||
u32 sq_tail;
|
||||
u32 cq_head;
|
||||
};
|
||||
|
||||
enum class QueueType {
|
||||
Polled,
|
||||
IRQ
|
||||
};
|
||||
|
||||
class AsyncBlockDeviceRequest;
|
||||
|
||||
struct NVMeIO {
|
||||
RefPtr<AsyncBlockDeviceRequest> request;
|
||||
bool used = false;
|
||||
Function<void(u16 status)> end_io_handler;
|
||||
};
|
||||
|
||||
class NVMeController;
|
||||
class NVMeQueue : public AtomicRefCounted<NVMeQueue> {
|
||||
public:
|
||||
static ErrorOr<NonnullLockRefPtr<NVMeQueue>> try_create(NVMeController& device, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs, QueueType queue_type);
|
||||
bool is_admin_queue() { return m_admin_queue; };
|
||||
u16 submit_sync_sqe(NVMeSubmission&);
|
||||
void read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
|
||||
void write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
|
||||
virtual void submit_sqe(NVMeSubmission&);
|
||||
virtual ~NVMeQueue();
|
||||
|
||||
protected:
|
||||
u32 process_cq();
|
||||
void update_sq_doorbell()
|
||||
{
|
||||
m_db_regs->sq_tail = m_sq_tail;
|
||||
}
|
||||
NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, OwnPtr<Memory::Region> sq_dma_region, Memory::TypedMapping<DoorbellRegister volatile> db_regs);
|
||||
|
||||
[[nodiscard]] u32 get_request_cid()
|
||||
{
|
||||
u32 expected_tag = m_tag.load(AK::memory_order_acquire);
|
||||
|
||||
for (;;) {
|
||||
u32 cid = expected_tag + 1;
|
||||
if (cid == m_qdepth)
|
||||
cid = 0;
|
||||
if (m_tag.compare_exchange_strong(expected_tag, cid, AK::memory_order_acquire))
|
||||
return cid;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool cqe_available();
|
||||
void update_cqe_head();
|
||||
virtual void complete_current_request(u16 cmdid, u16 status) = 0;
|
||||
void update_cq_doorbell()
|
||||
{
|
||||
m_db_regs->cq_head = m_cq_head;
|
||||
}
|
||||
|
||||
protected:
|
||||
Spinlock<LockRank::Interrupts> m_cq_lock {};
|
||||
HashMap<u16, NVMeIO> m_requests;
|
||||
NonnullOwnPtr<Memory::Region> m_rw_dma_region;
|
||||
Spinlock<LockRank::None> m_request_lock {};
|
||||
|
||||
private:
|
||||
u16 m_qid {};
|
||||
u8 m_cq_valid_phase { 1 };
|
||||
u16 m_sq_tail {};
|
||||
u16 m_cq_head {};
|
||||
bool m_admin_queue { false };
|
||||
u32 m_qdepth {};
|
||||
Atomic<u32> m_tag { 0 }; // used for the cid in a submission queue entry
|
||||
Spinlock<LockRank::Interrupts> m_sq_lock {};
|
||||
OwnPtr<Memory::Region> m_cq_dma_region;
|
||||
Span<NVMeSubmission> m_sqe_array;
|
||||
OwnPtr<Memory::Region> m_sq_dma_region;
|
||||
Span<NVMeCompletion> m_cqe_array;
|
||||
WaitQueue m_sync_wait_queue;
|
||||
Memory::TypedMapping<DoorbellRegister volatile> m_db_regs;
|
||||
NonnullRefPtr<Memory::PhysicalPage const> const m_rw_dma_page;
|
||||
};
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue