diff --git a/Kernel/API/POSIX/netinet/tcp.h b/Kernel/API/POSIX/netinet/tcp.h new file mode 100644 index 0000000000..5bea9ee99f --- /dev/null +++ b/Kernel/API/POSIX/netinet/tcp.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023, Romain Chardiny + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#define TCP_NODELAY 10 +#define TCP_MAXSEG 11 + +#ifdef __cplusplus +} +#endif diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index 7d5986539b..ccb0df0b34 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -211,16 +211,17 @@ ErrorOr TCPSocket::protocol_send(UserOrKernelBuffer const& data, size_t return set_so_error(EHOSTUNREACH); size_t mss = routing_decision.adapter->mtu() - sizeof(IPv4Packet) - sizeof(TCPPacket); - // RFC 896 (Nagle’s algorithm): https://www.ietf.org/rfc/rfc0896 - // "The solution is to inhibit the sending of new TCP segments when - // new outgoing data arrives from the user if any previously - // transmitted data on the connection remains unacknowledged. This - // inhibition is to be unconditional; no timers, tests for size of - // data received, or other conditions are required." - // FIXME: Make this configurable via TCP_NODELAY. - auto has_unacked_data = m_unacked_packets.with_shared([&](auto const& packets) { return packets.size > 0; }); - if (has_unacked_data && data_length < mss) - return set_so_error(EAGAIN); + if (!m_no_delay) { + // RFC 896 (Nagle’s algorithm): https://www.ietf.org/rfc/rfc0896 + // "The solution is to inhibit the sending of new TCP segments when + // new outgoing data arrives from the user if any previously + // transmitted data on the connection remains unacknowledged. This + // inhibition is to be unconditional; no timers, tests for size of + // data received, or other conditions are required." + auto has_unacked_data = m_unacked_packets.with_shared([&](auto const& packets) { return packets.size > 0; }); + if (has_unacked_data && data_length < mss) + return set_so_error(EAGAIN); + } data_length = min(data_length, mss); TRY(send_tcp_packet(TCPFlags::PSH | TCPFlags::ACK, &data, data_length, &routing_decision)); @@ -427,6 +428,54 @@ NetworkOrdered TCPSocket::compute_tcp_checksum(IPv4Address const& source, I return ~(checksum & 0xffff); } +ErrorOr TCPSocket::setsockopt(int level, int option, Userspace user_value, socklen_t user_value_size) +{ + if (level != IPPROTO_TCP) + return IPv4Socket::setsockopt(level, option, user_value, user_value_size); + + MutexLocker locker(mutex()); + + switch (option) { + case TCP_NODELAY: + if (user_value_size < sizeof(int)) + return EINVAL; + int value; + TRY(copy_from_user(&value, static_ptr_cast(user_value))); + if (value != 0 && value != 1) + return EINVAL; + m_no_delay = value; + return {}; + default: + dbgln("setsockopt({}) at IPPROTO_TCP not implemented.", option); + return ENOPROTOOPT; + } +} + +ErrorOr TCPSocket::getsockopt(OpenFileDescription& description, int level, int option, Userspace value, Userspace value_size) +{ + if (level != IPPROTO_TCP) + return IPv4Socket::getsockopt(description, level, option, value, value_size); + + MutexLocker locker(mutex()); + + socklen_t size; + TRY(copy_from_user(&size, value_size.unsafe_userspace_ptr())); + + switch (option) { + case TCP_NODELAY: { + int nodelay = m_no_delay ? 1 : 0; + if (size < sizeof(nodelay)) + return EINVAL; + TRY(copy_to_user(static_ptr_cast(value), &nodelay)); + size = sizeof(nodelay); + return copy_to_user(value_size, &size); + } + default: + dbgln("getsockopt({}) at IPPROTO_TCP not implemented.", option); + return ENOPROTOOPT; + } +} + ErrorOr TCPSocket::protocol_bind() { dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket::protocol_bind(), local_port() is {}", local_port()); diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index c6a3a90cc7..51e630c9fc 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -165,6 +165,9 @@ public: static NetworkOrdered compute_tcp_checksum(IPv4Address const& source, IPv4Address const& destination, TCPPacket const&, u16 payload_size); + virtual ErrorOr setsockopt(int level, int option, Userspace, socklen_t) override; + virtual ErrorOr getsockopt(OpenFileDescription&, int level, int option, Userspace, Userspace) override; + protected: void set_direction(Direction direction) { m_direction = direction; } @@ -227,6 +230,8 @@ private: // peer's advertised window size. u32 m_send_window_size { 64 * KiB }; + bool m_no_delay { false }; + IntrusiveListNode m_retransmit_list_node; public: diff --git a/Kernel/UnixTypes.h b/Kernel/UnixTypes.h index 05b33873a8..81e2694bf2 100644 --- a/Kernel/UnixTypes.h +++ b/Kernel/UnixTypes.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/Userland/Libraries/LibC/netinet/tcp.h b/Userland/Libraries/LibC/netinet/tcp.h index 219e5c8fa1..6e304da6da 100644 --- a/Userland/Libraries/LibC/netinet/tcp.h +++ b/Userland/Libraries/LibC/netinet/tcp.h @@ -6,5 +6,4 @@ #pragma once -#define TCP_NODELAY 10 -#define TCP_MAXSEG 11 +#include