diff --git a/Kernel/Net/IPv4Socket.cpp b/Kernel/Net/IPv4Socket.cpp index 2bdd2c9619..726baf9ae8 100644 --- a/Kernel/Net/IPv4Socket.cpp +++ b/Kernel/Net/IPv4Socket.cpp @@ -38,7 +38,7 @@ MutexProtected& IPv4Socket::all_sockets() ErrorOr> IPv4Socket::try_create_receive_buffer() { - return DoubleBuffer::try_create("IPv4Socket: Receive buffer"sv, 256 * KiB); + return DoubleBuffer::try_create("IPv4Socket: Receive buffer"sv, receive_buffer_size); } ErrorOr> IPv4Socket::create(int type, int protocol) diff --git a/Kernel/Net/IPv4Socket.h b/Kernel/Net/IPv4Socket.h index 48d5e3301e..c6c2bf0459 100644 --- a/Kernel/Net/IPv4Socket.h +++ b/Kernel/Net/IPv4Socket.h @@ -68,6 +68,8 @@ public: BufferMode buffer_mode() const { return m_buffer_mode; } protected: + static constexpr size_t receive_buffer_size = 256 * KiB; + IPv4Socket(int type, int protocol, NonnullOwnPtr receive_buffer, OwnPtr optional_scratch_buffer); virtual StringView class_name() const override { return "IPv4Socket"sv; } diff --git a/Kernel/Net/NetworkTask.cpp b/Kernel/Net/NetworkTask.cpp index 66b6498220..1d7d4c5221 100644 --- a/Kernel/Net/NetworkTask.cpp +++ b/Kernel/Net/NetworkTask.cpp @@ -430,6 +430,19 @@ void handle_tcp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timest dbgln_if(TCP_DEBUG, "handle_tcp: got socket {}; state={}", socket->tuple().to_string(), TCPSocket::to_string(socket->state())); socket->receive_tcp_packet(tcp_packet, ipv4_packet.payload_size()); + Optional send_window_scale; + if (tcp_packet.has_syn()) { + tcp_packet.for_each_option([&send_window_scale](auto const& option) { + if (option.kind() != TCPOptionKind::WindowScale) + return; + if (option.length() != sizeof(TCPOptionWindowScale)) + return; + auto scale = static_cast(option).value(); + if (scale > 14) + return; // Maximum allowed as per RFC7323 + send_window_scale = scale; + }); + } switch (socket->state()) { case TCPSocket::State::Closed: @@ -459,6 +472,8 @@ void handle_tcp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timest client->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); [[maybe_unused]] auto rc2 = client->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK); client->set_state(TCPSocket::State::SynReceived); + if (send_window_scale.has_value()) + client->set_send_window_scale(*send_window_scale); return; } default: @@ -472,6 +487,8 @@ void handle_tcp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timest socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); (void)socket->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK); socket->set_state(TCPSocket::State::SynReceived); + if (send_window_scale.has_value()) + socket->set_send_window_scale(*send_window_scale); return; case TCPFlags::ACK | TCPFlags::SYN: socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); @@ -479,6 +496,8 @@ void handle_tcp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timest socket->set_state(TCPSocket::State::Established); socket->set_setup_state(Socket::SetupState::Completed); socket->set_connected(true); + if (send_window_scale.has_value()) + socket->set_send_window_scale(*send_window_scale); return; case TCPFlags::ACK | TCPFlags::FIN: socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); diff --git a/Kernel/Net/TCP.h b/Kernel/Net/TCP.h index c843b5dda0..646a779f24 100644 --- a/Kernel/Net/TCP.h +++ b/Kernel/Net/TCP.h @@ -21,21 +21,59 @@ struct TCPFlags { }; }; -class [[gnu::packed]] TCPOptionMSS { +enum class TCPOptionKind : u8 { + End = 0, + Nop = 1, + MSS = 2, + WindowScale = 3, + SACKPermitted = 4, + SACK = 5, + Timestamp = 6, +}; + +class [[gnu::packed]] TCPOption { +public: + TCPOptionKind kind() const { return m_kind; } + u8 length() const { return m_length; } + +protected: + TCPOption(TCPOptionKind kind, u8 length) + : m_kind(kind) + , m_length(length) {}; + +private: + TCPOptionKind m_kind { TCPOptionKind::End }; + u8 m_length { sizeof(TCPOption) }; +}; + +class [[gnu::packed]] TCPOptionMSS : public TCPOption { public: TCPOptionMSS(u16 value) - : m_value(value) + : TCPOption(TCPOptionKind::MSS, sizeof(TCPOptionMSS)) + , m_value(value) { } u16 value() const { return m_value; } private: - u8 m_option_kind { 0x02 }; - u8 m_option_length { sizeof(TCPOptionMSS) }; NetworkOrdered m_value; }; +class [[gnu::packed]] TCPOptionWindowScale : public TCPOption { +public: + TCPOptionWindowScale(u8 value) + : TCPOption(TCPOptionKind::WindowScale, sizeof(TCPOptionWindowScale)) + , m_value(value) + { + } + + u8 value() const { return m_value; } + +private: + NetworkOrdered m_value; +}; + static_assert(AssertSize()); class [[gnu::packed]] TCPPacket { @@ -80,6 +118,28 @@ public: void const* payload() const { return ((u8 const*)this) + header_size(); } void* payload() { return ((u8*)this) + header_size(); } + template + void for_each_option(Callback callback) const + { + auto const* next_option = (u8 const*)this + sizeof(TCPPacket); + auto const* options_end = payload(); + while (next_option < options_end) { + if ((size_t)options_end - (size_t)next_option < sizeof(TCPOption)) + return; // Not enough space left for another option + auto const* option = (TCPOption const*)next_option; + if (option->kind() == TCPOptionKind::End) + return; + if (option->kind() == TCPOptionKind::Nop) { + next_option += 1; + continue; + } + if (option->length() < sizeof(TCPOption)) + return; // minimal option length + callback(*option); + next_option += option->length(); + } + } + private: NetworkOrdered m_source_port; NetworkOrdered m_destination_port; diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index 1e5caa2599..ca20ecf82f 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -245,10 +245,11 @@ ErrorOr TCPSocket::send_tcp_packet(u16 flags, UserOrKernelBuffer const* pa auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset(); - bool const has_mss_option = flags == TCPFlags::SYN; - const size_t options_size = has_mss_option ? sizeof(TCPOptionMSS) : 0; - const size_t tcp_header_size = sizeof(TCPPacket) + options_size; - const size_t buffer_size = ipv4_payload_offset + tcp_header_size + payload_size; + bool const has_mss_option = flags & TCPFlags::SYN; + bool const has_window_scale_option = flags & TCPFlags::SYN; + size_t const options_size = (has_mss_option ? sizeof(TCPOptionMSS) : 0) + (has_window_scale_option ? sizeof(TCPOptionWindowScale) : 0); + size_t const tcp_header_size = sizeof(TCPPacket) + align_up_to(options_size, 4); + size_t const buffer_size = ipv4_payload_offset + tcp_header_size + payload_size; auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size); if (!packet) return set_so_error(ENOMEM); @@ -260,7 +261,10 @@ ErrorOr TCPSocket::send_tcp_packet(u16 flags, UserOrKernelBuffer const* pa VERIFY(local_port()); tcp_packet.set_source_port(local_port()); tcp_packet.set_destination_port(peer_port()); - tcp_packet.set_window_size(min(available_space_in_receive_buffer(), NumericLimits::max())); + auto window_size = available_space_in_receive_buffer(); + if ((flags & TCPFlags::SYN) == 0 && m_window_scaling_supported) + window_size >>= receive_window_scale(); + tcp_packet.set_window_size(min(window_size, NumericLimits::max())); tcp_packet.set_sequence_number(m_sequence_number); tcp_packet.set_data_offset(tcp_header_size / sizeof(u32)); tcp_packet.set_flags(flags); @@ -284,12 +288,20 @@ ErrorOr TCPSocket::send_tcp_packet(u16 flags, UserOrKernelBuffer const* pa m_sequence_number += payload_size; } + u8* next_option = packet->buffer->data() + ipv4_payload_offset + sizeof(TCPPacket); if (has_mss_option) { u16 mss = routing_decision.adapter->mtu() - sizeof(IPv4Packet) - sizeof(TCPPacket); TCPOptionMSS mss_option { mss }; - VERIFY(packet->buffer->size() >= ipv4_payload_offset + sizeof(TCPPacket) + sizeof(mss_option)); - memcpy(packet->buffer->data() + ipv4_payload_offset + sizeof(TCPPacket), &mss_option, sizeof(mss_option)); + memcpy(next_option, &mss_option, sizeof(mss_option)); + next_option += sizeof(mss_option); } + if (has_window_scale_option) { + TCPOptionWindowScale window_scale_option { receive_window_scale() }; + memcpy(next_option, &window_scale_option, sizeof(window_scale_option)); + next_option += sizeof(window_scale_option); + } + if ((options_size % 4) != 0) + *next_option = to_underlying(TCPOptionKind::End); tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size)); @@ -339,7 +351,7 @@ void TCPSocket::receive_tcp_packet(TCPPacket const& packet, u16 size) old_adapter->release_packet_buffer(*packet.buffer); TCPPacket& tcp_packet = *(TCPPacket*)(packet.buffer->buffer->data() + packet.ipv4_payload_offset); if (m_send_window_size != tcp_packet.window_size()) { - m_send_window_size = tcp_packet.window_size(); + m_send_window_size = tcp_packet.window_size() << m_send_window_scale; } auto payload_size = packet.buffer->buffer->data() + packet.buffer->buffer->size() - (u8*)tcp_packet.payload(); unacked_packets.size -= payload_size; @@ -367,7 +379,7 @@ void TCPSocket::receive_tcp_packet(TCPPacket const& packet, u16 size) bool TCPSocket::should_delay_next_ack() const { // FIXME: We don't know the MSS here so make a reasonable guess. - const size_t mss = 1500; + size_t const mss = 1500; // RFC 1122 says we should send an ACK for every two full-sized segments. if (m_ack_number >= m_last_ack_number_sent + 2 * mss) diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index 466e23da34..f2e82494a0 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -135,6 +136,12 @@ public: u32 packets_out() const { return m_packets_out; } u32 bytes_out() const { return m_bytes_out; } + void set_send_window_scale(size_t scale) + { + m_window_scaling_supported = true; + m_send_window_scale = scale; + } + // FIXME: Make this configurable? static constexpr u32 maximum_duplicate_acks = 5; void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; } @@ -188,6 +195,14 @@ private: void enqueue_for_retransmit(); void dequeue_for_retransmit(); + static constexpr size_t receive_window_scale() + { + auto buffer_size_bit_length = AK::log2(receive_buffer_size) + 1; + if (buffer_size_bit_length < 16) + return 0; + return buffer_size_bit_length - 16; + } + LockWeakPtr m_originator; HashMap> m_pending_release_for_accept; Direction m_direction { Direction::Unspecified }; @@ -229,6 +244,8 @@ private: // Default to maximum window size. receive_tcp_packet() will update from the // peer's advertised window size. u32 m_send_window_size { 64 * KiB }; + bool m_window_scaling_supported { false }; + size_t m_send_window_scale { 0 }; bool m_no_delay { false };