From 6cdb1f041568076cd8108953772e163b7a8fa085 Mon Sep 17 00:00:00 2001 From: Kirill Nikolaev Date: Sun, 2 Jul 2023 17:39:47 +0200 Subject: [PATCH] Kernel: Add an initial implementation of virtio-net driver It can be exercised by setting SERENITY_ETHERNET_DEVICE_TYPE=virtio-net-pci. --- Kernel/CMakeLists.txt | 1 + Kernel/Memory/RingBuffer.h | 1 + Kernel/Net/NetworkingManagement.cpp | 2 + Kernel/Net/VirtIO/VirtIONetworkAdapter.cpp | 283 +++++++++++++++++++++ Kernel/Net/VirtIO/VirtIONetworkAdapter.h | 60 +++++ 5 files changed, 347 insertions(+) create mode 100644 Kernel/Net/VirtIO/VirtIONetworkAdapter.cpp create mode 100644 Kernel/Net/VirtIO/VirtIONetworkAdapter.h diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 4cf3f0344c..a0bc372404 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -250,6 +250,7 @@ set(KERNEL_SOURCES Net/Intel/E1000ENetworkAdapter.cpp Net/Intel/E1000NetworkAdapter.cpp Net/Realtek/RTL8168NetworkAdapter.cpp + Net/VirtIO/VirtIONetworkAdapter.cpp Net/IPv4Socket.cpp Net/LocalSocket.cpp Net/LoopbackAdapter.cpp diff --git a/Kernel/Memory/RingBuffer.h b/Kernel/Memory/RingBuffer.h index 43ea60d6f4..e66c8ca2ca 100644 --- a/Kernel/Memory/RingBuffer.h +++ b/Kernel/Memory/RingBuffer.h @@ -24,6 +24,7 @@ public: Spinlock& lock() { return m_lock; } size_t used_bytes() const { return m_num_used_bytes; } + size_t available_bytes() const { return m_capacity_in_bytes - m_num_used_bytes; } PhysicalAddress start_of_region() const { return m_region->physical_page(0)->paddr(); } VirtualAddress vaddr() const { return m_region->vaddr(); } size_t bytes_till_end() const { return (m_capacity_in_bytes - ((m_start_of_used + m_num_used_bytes) % m_capacity_in_bytes)) % m_capacity_in_bytes; } diff --git a/Kernel/Net/NetworkingManagement.cpp b/Kernel/Net/NetworkingManagement.cpp index 323a953547..b3dc216704 100644 --- a/Kernel/Net/NetworkingManagement.cpp +++ b/Kernel/Net/NetworkingManagement.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include namespace Kernel { @@ -100,6 +101,7 @@ static constexpr PCINetworkDriverInitializer s_initializers[] = { { RTL8168NetworkAdapter::probe, RTL8168NetworkAdapter::create }, { E1000NetworkAdapter::probe, E1000NetworkAdapter::create }, { E1000ENetworkAdapter::probe, E1000ENetworkAdapter::create }, + { VirtIONetworkAdapter::probe, VirtIONetworkAdapter::create }, }; UNMAP_AFTER_INIT ErrorOr> NetworkingManagement::determine_network_device(PCI::DeviceIdentifier const& device_identifier) const diff --git a/Kernel/Net/VirtIO/VirtIONetworkAdapter.cpp b/Kernel/Net/VirtIO/VirtIONetworkAdapter.cpp new file mode 100644 index 0000000000..959c35211f --- /dev/null +++ b/Kernel/Net/VirtIO/VirtIONetworkAdapter.cpp @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2023, Kirill Nikolaev + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace Kernel { + +namespace VirtIO { + +// https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html + +static constexpr u64 VIRTIO_NET_F_CSUM = (1ull << 0); // Device handles packets with partial checksum. +static constexpr u64 VIRTIO_NET_F_GUEST_CSUM = (1ull << 1); // Driver handles packets with partial checksum. +static constexpr u64 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS = (1ull << 2); // Control channel offloads reconfiguration support. +static constexpr u64 VIRTIO_NET_F_MTU = (1ull << 3); // Device maximum MTU reporting is supported. +static constexpr u64 VIRTIO_NET_F_MAC = (1ull << 5); // Device has given MAC address. +static constexpr u64 VIRTIO_NET_F_GUEST_TSO4 = (1ull << 7); // Driver can receive TSOv4. +static constexpr u64 VIRTIO_NET_F_GUEST_TSO6 = (1ull << 8); // Driver can receive TSOv6. +static constexpr u64 VIRTIO_NET_F_GUEST_ECN = (1ull << 9); // Driver can receive TSO with ECN. +static constexpr u64 VIRTIO_NET_F_GUEST_UFO = (1ull << 10); // Driver can receive UFO. +static constexpr u64 VIRTIO_NET_F_HOST_TSO4 = (1ull << 11); // Device can receive TSOv4. +static constexpr u64 VIRTIO_NET_F_HOST_TSO6 = (1ull << 12); // Device can receive TSOv6. +static constexpr u64 VIRTIO_NET_F_HOST_ECN = (1ull << 13); // Device can receive TSO with ECN. +static constexpr u64 VIRTIO_NET_F_HOST_UFO = (1ull << 14); // Device can receive UFO. +static constexpr u64 VIRTIO_NET_F_MRG_RXBUF = (1ull << 15); // Driver can merge receive buffers. +static constexpr u64 VIRTIO_NET_F_STATUS = (1ull << 16); // Configuration status field is available. +static constexpr u64 VIRTIO_NET_F_CTRL_VQ = (1ull << 17); // Control channel is available. +static constexpr u64 VIRTIO_NET_F_CTRL_RX = (1ull << 18); // Control channel RX mode support. +static constexpr u64 VIRTIO_NET_F_CTRL_VLAN = (1ull << 19); // Control channel VLAN filtering. +static constexpr u64 VIRTIO_NET_F_GUEST_ANNOUNCE = (1ull << 21); // Driver can send gratuitous packets. +static constexpr u64 VIRTIO_NET_F_MQ = (1ull << 22); // Device supports multiqueue with automatic receive steering. +static constexpr u64 VIRTIO_NET_F_CTRL_MAC_ADDR = (1ull << 23); // Set MAC address through control channel. +static constexpr u64 VIRTIO_NET_F_HOST_USO = (1ull << 56); // Device can receive USO packets. +static constexpr u64 VIRTIO_NET_F_HASH_REPORT = (1ull << 57); // Device can report per-packet hash value and a type of calculated hash. +static constexpr u64 VIRTIO_NET_F_GUEST_HDRLEN = (1ull << 59); // Driver can provide the exact hdr_len value. +static constexpr u64 VIRTIO_NET_F_RSS = (1ull << 60); // Device supports RSS with Toeplitz hash calculation +static constexpr u64 VIRTIO_NET_F_RSC_EXT = (1ull << 21); // Device can process duplicated ACKs and report number of coalesced segments and duplicated ACKs. +static constexpr u64 VIRTIO_NET_F_STANDBY = (1ull << 63); // Device may act as a standby for a primary device with the same MAC address. +static constexpr u64 VIRTIO_NET_F_SPEED_DUPLEX = (1ull << 63); // Device reports speed and duplex. + +static constexpr u16 VIRTIO_NET_S_LINK_UP = 1; +static constexpr u16 VIRTIO_NET_S_ANNOUNCE = 2; + +static constexpr u8 VIRTIO_NET_HDR_F_NEEDS_CSUM = 1; +static constexpr u8 VIRTIO_NET_HDR_F_DATA_VALID = 1; +static constexpr u8 VIRTIO_NET_HDR_F_RSC_INFO = 1; +static constexpr u8 VIRTIO_NET_HDR_GSO_NONE = 0; +static constexpr u8 VIRTIO_NET_HDR_GSO_TCPV4 = 1; +static constexpr u8 VIRTIO_NET_HDR_GSO_UDP = 3; +static constexpr u8 VIRTIO_NET_HDR_GSO_TCPV6 = 4; +static constexpr u8 VIRTIO_NET_HDR_GSO_UDP_L4 = 5; +static constexpr u8 VIRTIO_NET_HDR_GSO_ECN = 0x80; + +struct [[gnu::packed]] VirtIONetConfig { + u8 mac[6]; + LittleEndian status; + LittleEndian max_virtqueue_pairs; + LittleEndian mtu; + LittleEndian speed; + u8 duplex; + u8 rss_max_key_size; + LittleEndian rss_max_indirection_table_length; + LittleEndian supported_hash_types; +}; + +struct [[gnu::packed]] VirtIONetHdr { + u8 flags; + u8 gso_type; + LittleEndian hdr_len; + LittleEndian gso_size; + LittleEndian csum_start; + LittleEndian csum_offset; + LittleEndian num_buffers; + u8 frame[0]; +}; + +} + +using namespace VirtIO; + +static constexpr u16 RECEIVEQ = 0; +static constexpr u16 TRANSMITQ = 1; + +static constexpr size_t MAX_RX_FRAME_SIZE = 1514; // Non-jumbo Ethernet frame limit. +static constexpr size_t RX_BUFFER_SIZE = sizeof(VirtIONetHdr) * MAX_RX_FRAME_SIZE; +static constexpr u16 MAX_INFLIGHT_PACKETS = 128; + +UNMAP_AFTER_INIT ErrorOr VirtIONetworkAdapter::probe(PCI::DeviceIdentifier const& pci_device_identifier) +{ + if (pci_device_identifier.hardware_id().vendor_id != PCI::VendorID::VirtIO) + return false; + if (pci_device_identifier.hardware_id().device_id != PCI::DeviceID::VirtIONetAdapter) + return false; + return true; +} + +UNMAP_AFTER_INIT ErrorOr> VirtIONetworkAdapter::create(PCI::DeviceIdentifier const& pci_device_identifier) +{ + auto interface_name = TRY(NetworkingManagement::generate_interface_name_from_pci_address(pci_device_identifier)); + return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) VirtIONetworkAdapter(pci_device_identifier, move(interface_name)))); +} + +UNMAP_AFTER_INIT VirtIONetworkAdapter::VirtIONetworkAdapter(PCI::DeviceIdentifier const& pci_device_identifier, NonnullOwnPtr interface_name) + : VirtIO::Device(pci_device_identifier) + , NetworkAdapter(move(interface_name)) +{ +} + +UNMAP_AFTER_INIT ErrorOr VirtIONetworkAdapter::initialize(Badge) +{ + m_rx_buffers = TRY(Memory::RingBuffer::try_create("VirtIONetworkAdapter Rx buffer"sv, RX_BUFFER_SIZE * MAX_INFLIGHT_PACKETS)); + m_tx_buffers = TRY(Memory::RingBuffer::try_create("VirtIONetworkAdapter Tx buffer"sv, RX_BUFFER_SIZE * MAX_INFLIGHT_PACKETS)); + + return initialize_virtio_resources(); +} + +UNMAP_AFTER_INIT ErrorOr VirtIONetworkAdapter::initialize_virtio_resources() +{ + dbgln_if(VIRTIO_DEBUG, "VirtIONetworkAdapter: initialize_virtio_resources"); + TRY(Device::initialize_virtio_resources()); + m_device_config = TRY(get_config(VirtIO::ConfigurationType::Device)); + + bool success = negotiate_features([&](u64 supported_features) { + u64 negotiated = 0; + if (is_feature_set(supported_features, VIRTIO_NET_F_STATUS)) + negotiated |= VIRTIO_NET_F_STATUS; + if (is_feature_set(supported_features, VIRTIO_NET_F_MAC)) + negotiated |= VIRTIO_NET_F_MAC; + if (is_feature_set(supported_features, VIRTIO_NET_F_SPEED_DUPLEX)) + negotiated |= VIRTIO_NET_F_SPEED_DUPLEX; + if (is_feature_set(supported_features, VIRTIO_NET_F_MTU)) + negotiated |= VIRTIO_NET_F_MTU; + return negotiated; + }); + if (!success) + return Error::from_errno(EIO); + + success = handle_device_config_change(); + if (!success) + return Error::from_errno(EIO); + + success = setup_queues(2); // receive & transmit + if (!success) + return Error::from_errno(EIO); + + finish_init(); + + { + // Supply receive buffers. + auto& rx_queue = get_queue(RECEIVEQ); + SpinlockLocker queue_lock(rx_queue.lock()); + VirtIO::QueueChain chain(rx_queue); + while (m_rx_buffers->available_bytes() > RX_BUFFER_SIZE) { + // We know that the RingBuffer will not wraparound in this loop. But it's still awkward. + auto buffer_start = MUST(m_rx_buffers->reserve_space(RX_BUFFER_SIZE)); + VERIFY(chain.add_buffer_to_chain(buffer_start, RX_BUFFER_SIZE, VirtIO::BufferType::DeviceWritable)); + supply_chain_and_notify(RECEIVEQ, chain); + } + } + + return {}; +} + +bool VirtIONetworkAdapter::handle_device_config_change() +{ + dbgln_if(VIRTIO_DEBUG, "VirtIONetworkAdapter: handle_device_config_change"); + read_config_atomic([&]() { + if (is_feature_accepted(VIRTIO_NET_F_MAC)) { + set_mac_address(MACAddress( + config_read8(*m_device_config, 0x0), + config_read8(*m_device_config, 0x1), + config_read8(*m_device_config, 0x2), + config_read8(*m_device_config, 0x3), + config_read8(*m_device_config, 0x4), + config_read8(*m_device_config, 0x5))); + } + if (is_feature_accepted(VIRTIO_NET_F_STATUS)) { + u16 status = config_read16(*m_device_config, offsetof(VirtIONetConfig, status)); + m_link_up = (status & VIRTIO_NET_S_LINK_UP) != 0; + } + if (is_feature_accepted(VIRTIO_NET_F_MTU)) { + u16 mtu = config_read16(*m_device_config, offsetof(VirtIONetConfig, mtu)); + set_mtu(mtu); + } + if (is_feature_accepted(VIRTIO_NET_F_SPEED_DUPLEX)) { + u32 speed = config_read32(*m_device_config, offsetof(VirtIONetConfig, speed)); + m_link_speed = speed; + u32 duplex = config_read32(*m_device_config, offsetof(VirtIONetConfig, duplex)); + m_link_duplex = duplex == 0x01; + } + }); + return true; +} + +void VirtIONetworkAdapter::handle_queue_update(u16 queue_index) +{ + dbgln_if(VIRTIO_DEBUG, "VirtIONetworkAdapter: handle_queue_update {}", queue_index); + + if (queue_index == RECEIVEQ) { + // FIXME: Disable interrupts while receiving as recommended by the spec. + auto& queue = get_queue(RECEIVEQ); + SpinlockLocker queue_lock(queue.lock()); + size_t used; + VirtIO::QueueChain popped_chain = queue.pop_used_buffer_chain(used); + + while (!popped_chain.is_empty()) { + VERIFY(popped_chain.length() == 1); + popped_chain.for_each([&](PhysicalAddress addr, size_t length) { + size_t offset = addr.as_ptr() - m_rx_buffers->start_of_region().as_ptr(); + auto* message = reinterpret_cast(m_rx_buffers->vaddr().offset(offset).as_ptr()); + did_receive({ message->frame, length - sizeof(VirtIONetHdr) }); + }); + + supply_chain_and_notify(RECEIVEQ, popped_chain); + popped_chain = queue.pop_used_buffer_chain(used); + } + } else if (queue_index == TRANSMITQ) { + auto& queue = get_queue(TRANSMITQ); + SpinlockLocker queue_lock(queue.lock()); + SpinlockLocker ringbuffer_lock(m_tx_buffers->lock()); + + size_t used; + VirtIO::QueueChain popped_chain = queue.pop_used_buffer_chain(used); + do { + popped_chain.for_each([this](PhysicalAddress address, size_t length) { + m_tx_buffers->reclaim_space(address, length); + }); + popped_chain.release_buffer_slots_to_queue(); + popped_chain = queue.pop_used_buffer_chain(used); + } while (!popped_chain.is_empty()); + } else { + dmesgln("VirtIONetworkAdapter: unexpected update for queue {}", queue_index); + } +} + +static bool copy_data_to_chain(VirtIO::QueueChain& chain, Memory::RingBuffer& ring, u8 const* data, size_t length) +{ + UserOrKernelBuffer buf = UserOrKernelBuffer::for_kernel_buffer(const_cast(data)); + + size_t offset = 0; + while (offset < length) { + PhysicalAddress start_of_chunk; + size_t length_of_chunk; + VERIFY(ring.copy_data_in(buf, offset, length - offset, start_of_chunk, length_of_chunk)); + if (!chain.add_buffer_to_chain(start_of_chunk, length_of_chunk, VirtIO::BufferType::DeviceReadable)) { + // FIXME: Rewind the RingBuffer. + // We are leaving the RingBuffer in an inconsistent state, but interface doesn't allow to undo pushes :(. + return false; + } + offset += length_of_chunk; + } + return true; +} + +void VirtIONetworkAdapter::send_raw(ReadonlyBytes payload) +{ + dbgln_if(VIRTIO_DEBUG, "VirtIONetworkAdapter: send_raw length={}", payload.size()); + + auto& queue = get_queue(TRANSMITQ); + SpinlockLocker queue_lock(queue.lock()); + VirtIO::QueueChain chain(queue); + + SpinlockLocker ringbuffer_lock(m_tx_buffers->lock()); + if (m_tx_buffers->available_bytes() < sizeof(VirtIONetHdr) + payload.size()) { + // We can drop packets that don't fit to apply back pressure on eager senders. + dmesgln("VirtIONetworkAdapter: not enough space in the buffer. Dropping packet"); + return; + } + + // FIXME: Handle errors from pushing to the chain and rewind the RingBuffer. + VirtIONetHdr hdr {}; + VERIFY(copy_data_to_chain(chain, *m_tx_buffers, reinterpret_cast(&hdr), sizeof(hdr))); + VERIFY(copy_data_to_chain(chain, *m_tx_buffers, payload.data(), payload.size())); + + supply_chain_and_notify(TRANSMITQ, chain); +} + +} diff --git a/Kernel/Net/VirtIO/VirtIONetworkAdapter.h b/Kernel/Net/VirtIO/VirtIONetworkAdapter.h new file mode 100644 index 0000000000..1d7aff6218 --- /dev/null +++ b/Kernel/Net/VirtIO/VirtIONetworkAdapter.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, Kirill Nikolaev + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Kernel { + +class VirtIONetworkAdapter + : public VirtIO::Device + , public NetworkAdapter { + +public: + static ErrorOr probe(PCI::DeviceIdentifier const&); + static ErrorOr> create(PCI::DeviceIdentifier const&); + virtual ~VirtIONetworkAdapter() override = default; + + // VirtIO::Device + virtual ErrorOr initialize_virtio_resources() override; + virtual StringView device_name() const override { return class_name(); } + + // NetworkAdapter + virtual StringView class_name() const override { return "VirtIONetworkAdapter"sv; } + virtual Type adapter_type() const override { return Type::Ethernet; } + virtual ErrorOr initialize(Badge) override; + + virtual bool link_up() override { return m_link_up; } + virtual bool link_full_duplex() override { return m_link_duplex; } + virtual i32 link_speed() override { return m_link_speed; } + +private: + explicit VirtIONetworkAdapter(PCI::DeviceIdentifier const&, NonnullOwnPtr interface_name); + + // VirtIO::Device + virtual bool handle_device_config_change() override; + virtual void handle_queue_update(u16 queue_index) override; + + // NetworkAdapter + virtual void send_raw(ReadonlyBytes) override; + +private: + VirtIO::Configuration const* m_device_config { nullptr }; + + // FIXME: Make atomic as they are read without sync. + // Note that VirtIO::NetworkAdapter may also have the same defect. + bool m_link_up { false }; + i32 m_link_speed { LINKSPEED_INVALID }; + bool m_link_duplex { false }; + + OwnPtr m_rx_buffers; + OwnPtr m_tx_buffers; +}; + +}