mirror of
https://github.com/RGBCube/serenity
synced 2025-07-24 22:17:42 +00:00
Kernel: Coalesce TCP ACKs
Previously we'd send a TCP ACK for each TCP packet we received. This changes NetworkTask so that we send fewer TCP ACKs.
This commit is contained in:
parent
ffc6b714b0
commit
af59f64bc0
3 changed files with 81 additions and 8 deletions
|
@ -28,8 +28,11 @@ static void handle_ipv4(const EthernetFrameHeader&, size_t frame_size, const Tim
|
||||||
static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const Time& packet_timestamp);
|
static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const Time& packet_timestamp);
|
||||||
static void handle_udp(const IPv4Packet&, const Time& packet_timestamp);
|
static void handle_udp(const IPv4Packet&, const Time& packet_timestamp);
|
||||||
static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp);
|
static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp);
|
||||||
|
static void send_delayed_tcp_ack(RefPtr<TCPSocket> socket);
|
||||||
|
static void flush_delayed_tcp_acks(bool all);
|
||||||
|
|
||||||
static Thread* network_task = nullptr;
|
static Thread* network_task = nullptr;
|
||||||
|
static HashTable<RefPtr<TCPSocket>>* delayed_ack_sockets;
|
||||||
|
|
||||||
[[noreturn]] static void NetworkTask_main(void*);
|
[[noreturn]] static void NetworkTask_main(void*);
|
||||||
|
|
||||||
|
@ -47,6 +50,8 @@ bool NetworkTask::is_current()
|
||||||
|
|
||||||
void NetworkTask_main(void*)
|
void NetworkTask_main(void*)
|
||||||
{
|
{
|
||||||
|
delayed_ack_sockets = new HashTable<RefPtr<TCPSocket>>;
|
||||||
|
|
||||||
WaitQueue packet_wait_queue;
|
WaitQueue packet_wait_queue;
|
||||||
int pending_packets = 0;
|
int pending_packets = 0;
|
||||||
NetworkAdapter::for_each([&](auto& adapter) {
|
NetworkAdapter::for_each([&](auto& adapter) {
|
||||||
|
@ -86,9 +91,13 @@ void NetworkTask_main(void*)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
|
size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
|
||||||
if (!packet_size) {
|
if (!packet_size) {
|
||||||
|
// We might sleep for a while so we must flush all delayed TCP ACKs
|
||||||
|
// including those which haven't expired yet.
|
||||||
|
flush_delayed_tcp_acks(true);
|
||||||
packet_wait_queue.wait_forever("NetworkTask");
|
packet_wait_queue.wait_forever("NetworkTask");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
flush_delayed_tcp_acks(false);
|
||||||
if (packet_size < sizeof(EthernetFrameHeader)) {
|
if (packet_size < sizeof(EthernetFrameHeader)) {
|
||||||
dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size);
|
dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size);
|
||||||
continue;
|
continue;
|
||||||
|
@ -279,6 +288,38 @@ void handle_udp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
|
socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void send_delayed_tcp_ack(RefPtr<TCPSocket> socket)
|
||||||
|
{
|
||||||
|
VERIFY(socket->lock().is_locked());
|
||||||
|
if (!socket->should_delay_next_ack()) {
|
||||||
|
[[maybe_unused]] auto result = socket->send_ack();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
delayed_ack_sockets->set(move(socket));
|
||||||
|
}
|
||||||
|
|
||||||
|
void flush_delayed_tcp_acks(bool all)
|
||||||
|
{
|
||||||
|
Vector<RefPtr<TCPSocket>, 32> remaining_sockets;
|
||||||
|
for (auto& socket : *delayed_ack_sockets) {
|
||||||
|
Locker locker(socket->lock());
|
||||||
|
if (!all && socket->should_delay_next_ack()) {
|
||||||
|
remaining_sockets.append(socket);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
[[maybe_unused]] auto result = socket->send_ack();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (remaining_sockets.size() != delayed_ack_sockets->size()) {
|
||||||
|
delayed_ack_sockets->clear();
|
||||||
|
if (remaining_sockets.size() > 0)
|
||||||
|
dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size());
|
||||||
|
for (auto&& socket : remaining_sockets)
|
||||||
|
delayed_ack_sockets->set(move(socket));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
{
|
{
|
||||||
if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
|
if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
|
||||||
|
@ -393,26 +434,26 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
switch (tcp_packet.flags()) {
|
switch (tcp_packet.flags()) {
|
||||||
case TCPFlags::SYN:
|
case TCPFlags::SYN:
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
socket->set_state(TCPSocket::State::SynReceived);
|
socket->set_state(TCPSocket::State::SynReceived);
|
||||||
return;
|
return;
|
||||||
case TCPFlags::ACK | TCPFlags::SYN:
|
case TCPFlags::ACK | TCPFlags::SYN:
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
socket->set_state(TCPSocket::State::Established);
|
socket->set_state(TCPSocket::State::Established);
|
||||||
socket->set_setup_state(Socket::SetupState::Completed);
|
socket->set_setup_state(Socket::SetupState::Completed);
|
||||||
socket->set_connected(true);
|
socket->set_connected(true);
|
||||||
return;
|
return;
|
||||||
case TCPFlags::ACK | TCPFlags::FIN:
|
case TCPFlags::ACK | TCPFlags::FIN:
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
socket->set_state(TCPSocket::State::Closed);
|
socket->set_state(TCPSocket::State::Closed);
|
||||||
socket->set_error(TCPSocket::Error::FINDuringConnect);
|
socket->set_error(TCPSocket::Error::FINDuringConnect);
|
||||||
socket->set_setup_state(Socket::SetupState::Completed);
|
socket->set_setup_state(Socket::SetupState::Completed);
|
||||||
return;
|
return;
|
||||||
case TCPFlags::ACK | TCPFlags::RST:
|
case TCPFlags::ACK | TCPFlags::RST:
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
socket->set_state(TCPSocket::State::Closed);
|
socket->set_state(TCPSocket::State::Closed);
|
||||||
socket->set_error(TCPSocket::Error::RSTDuringConnect);
|
socket->set_error(TCPSocket::Error::RSTDuringConnect);
|
||||||
socket->set_setup_state(Socket::SetupState::Completed);
|
socket->set_setup_state(Socket::SetupState::Completed);
|
||||||
|
@ -536,7 +577,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) {
|
if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) {
|
||||||
dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission");
|
dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission");
|
||||||
socket->set_duplicate_acks(socket->duplicate_acks() + 1);
|
socket->set_duplicate_acks(socket->duplicate_acks() + 1);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
[[maybe_unused]] auto result = socket->send_ack(true);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -548,7 +589,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
|
socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
|
||||||
|
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
socket->set_state(TCPSocket::State::CloseWait);
|
socket->set_state(TCPSocket::State::CloseWait);
|
||||||
socket->set_connected(false);
|
socket->set_connected(false);
|
||||||
return;
|
return;
|
||||||
|
@ -559,7 +600,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp)
|
||||||
socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
|
socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
|
||||||
dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
|
dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
|
||||||
tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
|
tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
|
||||||
unused_rc = socket->send_tcp_packet(TCPFlags::ACK);
|
send_delayed_tcp_ack(socket);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,6 +167,13 @@ KResultOr<size_t> TCPSocket::protocol_send(const UserOrKernelBuffer& data, size_
|
||||||
return data_length;
|
return data_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
KResult TCPSocket::send_ack(bool allow_duplicate)
|
||||||
|
{
|
||||||
|
if (!allow_duplicate && m_last_ack_number_sent == m_ack_number)
|
||||||
|
return KSuccess;
|
||||||
|
return send_tcp_packet(TCPFlags::ACK);
|
||||||
|
}
|
||||||
|
|
||||||
KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size)
|
KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size)
|
||||||
{
|
{
|
||||||
const bool has_mss_option = flags == TCPFlags::SYN;
|
const bool has_mss_option = flags == TCPFlags::SYN;
|
||||||
|
@ -183,8 +190,11 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload,
|
||||||
tcp_packet.set_data_offset(header_size / sizeof(u32));
|
tcp_packet.set_data_offset(header_size / sizeof(u32));
|
||||||
tcp_packet.set_flags(flags);
|
tcp_packet.set_flags(flags);
|
||||||
|
|
||||||
if (flags & TCPFlags::ACK)
|
if (flags & TCPFlags::ACK) {
|
||||||
|
m_last_ack_number_sent = m_ack_number;
|
||||||
|
m_last_ack_sent_time = kgettimeofday();
|
||||||
tcp_packet.set_ack_number(m_ack_number);
|
tcp_packet.set_ack_number(m_ack_number);
|
||||||
|
}
|
||||||
|
|
||||||
if (payload && !payload->read(tcp_packet.payload(), payload_size))
|
if (payload && !payload->read(tcp_packet.payload(), payload_size))
|
||||||
return EFAULT;
|
return EFAULT;
|
||||||
|
@ -308,6 +318,22 @@ void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size)
|
||||||
m_bytes_in += packet.header_size() + size;
|
m_bytes_in += packet.header_size() + size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TCPSocket::should_delay_next_ack() const
|
||||||
|
{
|
||||||
|
// FIXME: We don't know the MSS here so make a reasonable guess.
|
||||||
|
const size_t mss = 1500;
|
||||||
|
|
||||||
|
// RFC 1122 says we should send an ACK for every two full-sized segments.
|
||||||
|
if (m_ack_number >= m_last_ack_number_sent + 2 * mss)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// RFC 1122 says we should not delay ACKs for more than 500 milliseconds.
|
||||||
|
if (kgettimeofday() >= m_last_ack_sent_time + Time::from_milliseconds(500))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size)
|
NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size)
|
||||||
{
|
{
|
||||||
struct [[gnu::packed]] PseudoHeader {
|
struct [[gnu::packed]] PseudoHeader {
|
||||||
|
|
|
@ -133,10 +133,13 @@ public:
|
||||||
void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; }
|
void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; }
|
||||||
u32 duplicate_acks() const { return m_duplicate_acks; }
|
u32 duplicate_acks() const { return m_duplicate_acks; }
|
||||||
|
|
||||||
|
KResult send_ack(bool allow_duplicate = false);
|
||||||
KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0);
|
KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0);
|
||||||
void send_outgoing_packets(RoutingDecision&);
|
void send_outgoing_packets(RoutingDecision&);
|
||||||
void receive_tcp_packet(const TCPPacket&, u16 size);
|
void receive_tcp_packet(const TCPPacket&, u16 size);
|
||||||
|
|
||||||
|
bool should_delay_next_ack() const;
|
||||||
|
|
||||||
static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& sockets_by_tuple();
|
static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& sockets_by_tuple();
|
||||||
static RefPtr<TCPSocket> from_tuple(const IPv4SocketTuple& tuple);
|
static RefPtr<TCPSocket> from_tuple(const IPv4SocketTuple& tuple);
|
||||||
static RefPtr<TCPSocket> from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port);
|
static RefPtr<TCPSocket> from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port);
|
||||||
|
@ -194,6 +197,9 @@ private:
|
||||||
SinglyLinkedList<OutgoingPacket> m_not_acked;
|
SinglyLinkedList<OutgoingPacket> m_not_acked;
|
||||||
|
|
||||||
u32 m_duplicate_acks { 0 };
|
u32 m_duplicate_acks { 0 };
|
||||||
|
|
||||||
|
u32 m_last_ack_number_sent { 0 };
|
||||||
|
Time m_last_ack_sent_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue