From 117d8db2a2167d97ace675d9a5821a91a6955899 Mon Sep 17 00:00:00 2001 From: Conrad Pankoff Date: Sun, 8 Sep 2019 17:38:08 +1000 Subject: [PATCH] Kernel: Implement outgoing TCP retransmission and better ACK handling This approach is a bit naiive - whenever we send a packet out, we check to see if there are any other packets we should try to send. This works well enough for a busy connection but not very well for a quiet one. Ideally we would check for not-acked packets on some kind of timer, and use the length of this not-acked list as feedback to throttle the writes coming from userspace. --- Kernel/Net/NetworkTask.cpp | 7 +-- Kernel/Net/TCPSocket.cpp | 97 +++++++++++++++++++++++++++++--------- Kernel/Net/TCPSocket.h | 14 +++++- 3 files changed, 90 insertions(+), 28 deletions(-) diff --git a/Kernel/Net/NetworkTask.cpp b/Kernel/Net/NetworkTask.cpp index 68b1751bc2..55a6b98a3d 100644 --- a/Kernel/Net/NetworkTask.cpp +++ b/Kernel/Net/NetworkTask.cpp @@ -374,12 +374,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet) kprintf("handle_tcp: got socket; state=%s\n", socket->tuple().to_string().characters(), TCPSocket::to_string(socket->state())); #endif - if (tcp_packet.ack_number() != socket->sequence_number()) { - kprintf("handle_tcp: ack/seq mismatch: got %u, wanted %u\n", tcp_packet.ack_number(), socket->sequence_number()); - return; - } - - socket->record_incoming_data(ipv4_packet.payload_size()); + socket->receive_tcp_packet(tcp_packet, ipv4_packet.payload_size()); switch (socket->state()) { case TCPSocket::State::Closed: diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index eb08ac1068..5c1ad33aa9 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -138,9 +138,6 @@ int TCPSocket::protocol_send(const void* data, int data_length) void TCPSocket::send_tcp_packet(u16 flags, const void* payload, int payload_size) { - auto routing_decision = route_to(peer_address(), local_address()); - ASSERT(!routing_decision.is_zero()); - auto buffer = ByteBuffer::create_zeroed(sizeof(TCPPacket) + payload_size); auto& tcp_packet = *(TCPPacket*)(buffer.pointer()); ASSERT(local_port()); @@ -162,29 +159,87 @@ void TCPSocket::send_tcp_packet(u16 flags, const void* payload, int payload_size memcpy(tcp_packet.payload(), payload, payload_size); tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size)); -#ifdef TCP_SOCKET_DEBUG - kprintf("sending tcp packet from %s:%u to %s:%u with (%s%s%s%s) seq_no=%u, ack_no=%u\n", - local_address().to_string().characters(), - local_port(), - peer_address().to_string().characters(), - peer_port(), - tcp_packet.has_syn() ? "SYN " : "", - tcp_packet.has_ack() ? "ACK " : "", - tcp_packet.has_fin() ? "FIN " : "", - tcp_packet.has_rst() ? "RST " : "", - tcp_packet.sequence_number(), - tcp_packet.ack_number()); -#endif - routing_decision.adapter->send_ipv4(routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, buffer.data(), buffer.size()); - m_packets_out++; - m_bytes_out += buffer.size(); + if (tcp_packet.has_syn() || payload_size > 0) { + m_not_acked.append({ m_sequence_number, buffer, 0, {} }); + send_outgoing_packets(); + } else { + auto routing_decision = route_to(peer_address(), local_address()); + ASSERT(!routing_decision.is_zero()); + + routing_decision.adapter->send_ipv4( + routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, + buffer.data(), buffer.size()); + + m_packets_out++; + m_bytes_out += buffer.size(); + } } -void TCPSocket::record_incoming_data(int size) +void TCPSocket::send_outgoing_packets() { + auto routing_decision = route_to(peer_address(), local_address()); + ASSERT(!routing_decision.is_zero()); + + auto now = kgettimeofday(); + + for (auto& packet : m_not_acked) { + if (now.tv_sec <= packet.tx_time.tv_sec) + continue; + + packet.tx_time = now; + packet.tx_counter++; + +#ifdef TCP_SOCKET_DEBUG + auto& tcp_packet = *(TCPPacket*)(packet.buffer.pointer()); + kprintf("sending tcp packet from %s:%u to %s:%u with (%s%s%s%s) seq_no=%u, ack_no=%u, tx_counter=%u\n", + local_address().to_string().characters(), + local_port(), + peer_address().to_string().characters(), + peer_port(), + tcp_packet.has_syn() ? "SYN " : "", + tcp_packet.has_ack() ? "ACK " : "", + tcp_packet.has_fin() ? "FIN " : "", + tcp_packet.has_rst() ? "RST " : "", + tcp_packet.sequence_number(), + tcp_packet.ack_number(), + packet.tx_counter); +#endif + routing_decision.adapter->send_ipv4( + routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, + packet.buffer.data(), packet.buffer.size()); + + m_packets_out++; + m_bytes_out += packet.buffer.size(); + } +} + +void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size) +{ + if (packet.has_ack()) { + u32 ack_number = packet.ack_number(); + + dbg() << "TCPSocket: receive_tcp_packet: " << ack_number; + + int removed = 0; + while (!m_not_acked.is_empty()) { + auto& packet = m_not_acked.first(); + + dbg() << "TCPSocket: iterate: " << packet.ack_number; + + if (packet.ack_number <= ack_number) { + m_not_acked.take_first(); + removed++; + } else { + break; + } + } + + dbg() << "TCPSocket: receive_tcp_packet acknowledged " << removed << " packets"; + } + m_packets_in++; - m_bytes_in += size; + m_bytes_in += packet.header_size() + size; } NetworkOrdered TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size) diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index adef5d9802..1a1c1cbced 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include #include @@ -119,7 +121,8 @@ public: u32 bytes_out() const { return m_bytes_out; } void send_tcp_packet(u16 flags, const void* = nullptr, int = 0); - void record_incoming_data(int); + void send_outgoing_packets(); + void receive_tcp_packet(const TCPPacket&, u16 size); static Lockable>& sockets_by_tuple(); static RefPtr from_tuple(const IPv4SocketTuple& tuple); @@ -159,4 +162,13 @@ private: u32 m_bytes_in { 0 }; u32 m_packets_out { 0 }; u32 m_bytes_out { 0 }; + + struct OutgoingPacket { + u32 ack_number; + ByteBuffer buffer; + int tx_counter { 0 }; + timeval tx_time; + }; + + SinglyLinkedList m_not_acked; };