mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 11:32:43 +00:00 
			
		
		
		
	Kernel: Coalesce TCP ACKs
Previously we'd send a TCP ACK for each TCP packet we received. This changes NetworkTask so that we send fewer TCP ACKs.
This commit is contained in:
		
							parent
							
								
									ffc6b714b0
								
							
						
					
					
						commit
						af59f64bc0
					
				
					 3 changed files with 81 additions and 8 deletions
				
			
		|  | @ -28,8 +28,11 @@ static void handle_ipv4(const EthernetFrameHeader&, size_t frame_size, const Tim | ||||||
| static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const Time& packet_timestamp); | static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const Time& packet_timestamp); | ||||||
| static void handle_udp(const IPv4Packet&, const Time& packet_timestamp); | static void handle_udp(const IPv4Packet&, const Time& packet_timestamp); | ||||||
| static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp); | static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp); | ||||||
|  | static void send_delayed_tcp_ack(RefPtr<TCPSocket> socket); | ||||||
|  | static void flush_delayed_tcp_acks(bool all); | ||||||
| 
 | 
 | ||||||
| static Thread* network_task = nullptr; | static Thread* network_task = nullptr; | ||||||
|  | static HashTable<RefPtr<TCPSocket>>* delayed_ack_sockets; | ||||||
| 
 | 
 | ||||||
| [[noreturn]] static void NetworkTask_main(void*); | [[noreturn]] static void NetworkTask_main(void*); | ||||||
| 
 | 
 | ||||||
|  | @ -47,6 +50,8 @@ bool NetworkTask::is_current() | ||||||
| 
 | 
 | ||||||
| void NetworkTask_main(void*) | void NetworkTask_main(void*) | ||||||
| { | { | ||||||
|  |     delayed_ack_sockets = new HashTable<RefPtr<TCPSocket>>; | ||||||
|  | 
 | ||||||
|     WaitQueue packet_wait_queue; |     WaitQueue packet_wait_queue; | ||||||
|     int pending_packets = 0; |     int pending_packets = 0; | ||||||
|     NetworkAdapter::for_each([&](auto& adapter) { |     NetworkAdapter::for_each([&](auto& adapter) { | ||||||
|  | @ -86,9 +91,13 @@ void NetworkTask_main(void*) | ||||||
|     for (;;) { |     for (;;) { | ||||||
|         size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp); |         size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp); | ||||||
|         if (!packet_size) { |         if (!packet_size) { | ||||||
|  |             // We might sleep for a while so we must flush all delayed TCP ACKs
 | ||||||
|  |             // including those which haven't expired yet.
 | ||||||
|  |             flush_delayed_tcp_acks(true); | ||||||
|             packet_wait_queue.wait_forever("NetworkTask"); |             packet_wait_queue.wait_forever("NetworkTask"); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |         flush_delayed_tcp_acks(false); | ||||||
|         if (packet_size < sizeof(EthernetFrameHeader)) { |         if (packet_size < sizeof(EthernetFrameHeader)) { | ||||||
|             dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size); |             dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size); | ||||||
|             continue; |             continue; | ||||||
|  | @ -279,6 +288,38 @@ void handle_udp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
|         socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); |         socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void send_delayed_tcp_ack(RefPtr<TCPSocket> socket) | ||||||
|  | { | ||||||
|  |     VERIFY(socket->lock().is_locked()); | ||||||
|  |     if (!socket->should_delay_next_ack()) { | ||||||
|  |         [[maybe_unused]] auto result = socket->send_ack(); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     delayed_ack_sockets->set(move(socket)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void flush_delayed_tcp_acks(bool all) | ||||||
|  | { | ||||||
|  |     Vector<RefPtr<TCPSocket>, 32> remaining_sockets; | ||||||
|  |     for (auto& socket : *delayed_ack_sockets) { | ||||||
|  |         Locker locker(socket->lock()); | ||||||
|  |         if (!all && socket->should_delay_next_ack()) { | ||||||
|  |             remaining_sockets.append(socket); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         [[maybe_unused]] auto result = socket->send_ack(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (remaining_sockets.size() != delayed_ack_sockets->size()) { | ||||||
|  |         delayed_ack_sockets->clear(); | ||||||
|  |         if (remaining_sockets.size() > 0) | ||||||
|  |             dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size()); | ||||||
|  |         for (auto&& socket : remaining_sockets) | ||||||
|  |             delayed_ack_sockets->set(move(socket)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
| { | { | ||||||
|     if (ipv4_packet.payload_size() < sizeof(TCPPacket)) { |     if (ipv4_packet.payload_size() < sizeof(TCPPacket)) { | ||||||
|  | @ -393,26 +434,26 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
|         switch (tcp_packet.flags()) { |         switch (tcp_packet.flags()) { | ||||||
|         case TCPFlags::SYN: |         case TCPFlags::SYN: | ||||||
|             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); |             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); | ||||||
|             unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |             send_delayed_tcp_ack(socket); | ||||||
|             socket->set_state(TCPSocket::State::SynReceived); |             socket->set_state(TCPSocket::State::SynReceived); | ||||||
|             return; |             return; | ||||||
|         case TCPFlags::ACK | TCPFlags::SYN: |         case TCPFlags::ACK | TCPFlags::SYN: | ||||||
|             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); |             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); | ||||||
|             unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |             send_delayed_tcp_ack(socket); | ||||||
|             socket->set_state(TCPSocket::State::Established); |             socket->set_state(TCPSocket::State::Established); | ||||||
|             socket->set_setup_state(Socket::SetupState::Completed); |             socket->set_setup_state(Socket::SetupState::Completed); | ||||||
|             socket->set_connected(true); |             socket->set_connected(true); | ||||||
|             return; |             return; | ||||||
|         case TCPFlags::ACK | TCPFlags::FIN: |         case TCPFlags::ACK | TCPFlags::FIN: | ||||||
|             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); |             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); | ||||||
|             unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |             send_delayed_tcp_ack(socket); | ||||||
|             socket->set_state(TCPSocket::State::Closed); |             socket->set_state(TCPSocket::State::Closed); | ||||||
|             socket->set_error(TCPSocket::Error::FINDuringConnect); |             socket->set_error(TCPSocket::Error::FINDuringConnect); | ||||||
|             socket->set_setup_state(Socket::SetupState::Completed); |             socket->set_setup_state(Socket::SetupState::Completed); | ||||||
|             return; |             return; | ||||||
|         case TCPFlags::ACK | TCPFlags::RST: |         case TCPFlags::ACK | TCPFlags::RST: | ||||||
|             socket->set_ack_number(tcp_packet.sequence_number() + payload_size); |             socket->set_ack_number(tcp_packet.sequence_number() + payload_size); | ||||||
|             unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |             send_delayed_tcp_ack(socket); | ||||||
|             socket->set_state(TCPSocket::State::Closed); |             socket->set_state(TCPSocket::State::Closed); | ||||||
|             socket->set_error(TCPSocket::Error::RSTDuringConnect); |             socket->set_error(TCPSocket::Error::RSTDuringConnect); | ||||||
|             socket->set_setup_state(Socket::SetupState::Completed); |             socket->set_setup_state(Socket::SetupState::Completed); | ||||||
|  | @ -536,7 +577,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
|             if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) { |             if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) { | ||||||
|                 dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission"); |                 dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission"); | ||||||
|                 socket->set_duplicate_acks(socket->duplicate_acks() + 1); |                 socket->set_duplicate_acks(socket->duplicate_acks() + 1); | ||||||
|                 unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |                 [[maybe_unused]] auto result = socket->send_ack(true); | ||||||
|             } |             } | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -548,7 +589,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
|                 socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); |                 socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); | ||||||
| 
 | 
 | ||||||
|             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); |             socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); | ||||||
|             unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |             send_delayed_tcp_ack(socket); | ||||||
|             socket->set_state(TCPSocket::State::CloseWait); |             socket->set_state(TCPSocket::State::CloseWait); | ||||||
|             socket->set_connected(false); |             socket->set_connected(false); | ||||||
|             return; |             return; | ||||||
|  | @ -559,7 +600,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) | ||||||
|                 socket->set_ack_number(tcp_packet.sequence_number() + payload_size); |                 socket->set_ack_number(tcp_packet.sequence_number() + payload_size); | ||||||
|                 dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}", |                 dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}", | ||||||
|                     tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number()); |                     tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number()); | ||||||
|                 unused_rc = socket->send_tcp_packet(TCPFlags::ACK); |                 send_delayed_tcp_ack(socket); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -167,6 +167,13 @@ KResultOr<size_t> TCPSocket::protocol_send(const UserOrKernelBuffer& data, size_ | ||||||
|     return data_length; |     return data_length; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | KResult TCPSocket::send_ack(bool allow_duplicate) | ||||||
|  | { | ||||||
|  |     if (!allow_duplicate && m_last_ack_number_sent == m_ack_number) | ||||||
|  |         return KSuccess; | ||||||
|  |     return send_tcp_packet(TCPFlags::ACK); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size) | KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size) | ||||||
| { | { | ||||||
|     const bool has_mss_option = flags == TCPFlags::SYN; |     const bool has_mss_option = flags == TCPFlags::SYN; | ||||||
|  | @ -183,8 +190,11 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, | ||||||
|     tcp_packet.set_data_offset(header_size / sizeof(u32)); |     tcp_packet.set_data_offset(header_size / sizeof(u32)); | ||||||
|     tcp_packet.set_flags(flags); |     tcp_packet.set_flags(flags); | ||||||
| 
 | 
 | ||||||
|     if (flags & TCPFlags::ACK) |     if (flags & TCPFlags::ACK) { | ||||||
|  |         m_last_ack_number_sent = m_ack_number; | ||||||
|  |         m_last_ack_sent_time = kgettimeofday(); | ||||||
|         tcp_packet.set_ack_number(m_ack_number); |         tcp_packet.set_ack_number(m_ack_number); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     if (payload && !payload->read(tcp_packet.payload(), payload_size)) |     if (payload && !payload->read(tcp_packet.payload(), payload_size)) | ||||||
|         return EFAULT; |         return EFAULT; | ||||||
|  | @ -308,6 +318,22 @@ void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size) | ||||||
|     m_bytes_in += packet.header_size() + size; |     m_bytes_in += packet.header_size() + size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool TCPSocket::should_delay_next_ack() const | ||||||
|  | { | ||||||
|  |     // FIXME: We don't know the MSS here so make a reasonable guess.
 | ||||||
|  |     const size_t mss = 1500; | ||||||
|  | 
 | ||||||
|  |     // RFC 1122 says we should send an ACK for every two full-sized segments.
 | ||||||
|  |     if (m_ack_number >= m_last_ack_number_sent + 2 * mss) | ||||||
|  |         return false; | ||||||
|  | 
 | ||||||
|  |     // RFC 1122 says we should not delay ACKs for more than 500 milliseconds.
 | ||||||
|  |     if (kgettimeofday() >= m_last_ack_sent_time + Time::from_milliseconds(500)) | ||||||
|  |         return false; | ||||||
|  | 
 | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size) | NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size) | ||||||
| { | { | ||||||
|     struct [[gnu::packed]] PseudoHeader { |     struct [[gnu::packed]] PseudoHeader { | ||||||
|  |  | ||||||
|  | @ -133,10 +133,13 @@ public: | ||||||
|     void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; } |     void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; } | ||||||
|     u32 duplicate_acks() const { return m_duplicate_acks; } |     u32 duplicate_acks() const { return m_duplicate_acks; } | ||||||
| 
 | 
 | ||||||
|  |     KResult send_ack(bool allow_duplicate = false); | ||||||
|     KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0); |     KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0); | ||||||
|     void send_outgoing_packets(RoutingDecision&); |     void send_outgoing_packets(RoutingDecision&); | ||||||
|     void receive_tcp_packet(const TCPPacket&, u16 size); |     void receive_tcp_packet(const TCPPacket&, u16 size); | ||||||
| 
 | 
 | ||||||
|  |     bool should_delay_next_ack() const; | ||||||
|  | 
 | ||||||
|     static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& sockets_by_tuple(); |     static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& sockets_by_tuple(); | ||||||
|     static RefPtr<TCPSocket> from_tuple(const IPv4SocketTuple& tuple); |     static RefPtr<TCPSocket> from_tuple(const IPv4SocketTuple& tuple); | ||||||
|     static RefPtr<TCPSocket> from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port); |     static RefPtr<TCPSocket> from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port); | ||||||
|  | @ -194,6 +197,9 @@ private: | ||||||
|     SinglyLinkedList<OutgoingPacket> m_not_acked; |     SinglyLinkedList<OutgoingPacket> m_not_acked; | ||||||
| 
 | 
 | ||||||
|     u32 m_duplicate_acks { 0 }; |     u32 m_duplicate_acks { 0 }; | ||||||
|  | 
 | ||||||
|  |     u32 m_last_ack_number_sent { 0 }; | ||||||
|  |     Time m_last_ack_sent_time; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Gunnar Beutner
						Gunnar Beutner