Serenity Operating System
1/*
2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <Kernel/Debug.h>
8#include <Kernel/Locking/Mutex.h>
9#include <Kernel/Locking/MutexProtected.h>
10#include <Kernel/Net/ARP.h>
11#include <Kernel/Net/EtherType.h>
12#include <Kernel/Net/EthernetFrameHeader.h>
13#include <Kernel/Net/ICMP.h>
14#include <Kernel/Net/IPv4.h>
15#include <Kernel/Net/IPv4Socket.h>
16#include <Kernel/Net/LoopbackAdapter.h>
17#include <Kernel/Net/NetworkTask.h>
18#include <Kernel/Net/NetworkingManagement.h>
19#include <Kernel/Net/Routing.h>
20#include <Kernel/Net/TCP.h>
21#include <Kernel/Net/TCPSocket.h>
22#include <Kernel/Net/UDP.h>
23#include <Kernel/Net/UDPSocket.h>
24#include <Kernel/Process.h>
25
26namespace Kernel {
27
28static void handle_arp(EthernetFrameHeader const&, size_t frame_size);
29static void handle_ipv4(EthernetFrameHeader const&, size_t frame_size, Time const& packet_timestamp);
30static void handle_icmp(EthernetFrameHeader const&, IPv4Packet const&, Time const& packet_timestamp);
31static void handle_udp(IPv4Packet const&, Time const& packet_timestamp);
32static void handle_tcp(IPv4Packet const&, Time const& packet_timestamp);
33static void send_delayed_tcp_ack(TCPSocket& socket);
34static void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, LockRefPtr<NetworkAdapter> adapter);
35static void flush_delayed_tcp_acks();
36static void retransmit_tcp_packets();
37
38static Thread* network_task = nullptr;
39static HashTable<NonnullRefPtr<TCPSocket>>* delayed_ack_sockets;
40
41[[noreturn]] static void NetworkTask_main(void*);
42
43void NetworkTask::spawn()
44{
45 LockRefPtr<Thread> thread;
46 auto name = KString::try_create("Network Task"sv);
47 if (name.is_error())
48 TODO();
49 (void)Process::create_kernel_process(thread, name.release_value(), NetworkTask_main, nullptr);
50 network_task = thread;
51}
52
53bool NetworkTask::is_current()
54{
55 return Thread::current() == network_task;
56}
57
58void NetworkTask_main(void*)
59{
60 delayed_ack_sockets = new HashTable<NonnullRefPtr<TCPSocket>>;
61
62 WaitQueue packet_wait_queue;
63 int pending_packets = 0;
64 NetworkingManagement::the().for_each([&](auto& adapter) {
65 dmesgln("NetworkTask: {} network adapter found: hw={}", adapter.class_name(), adapter.mac_address().to_string());
66
67 if (adapter.class_name() == "LoopbackAdapter"sv) {
68 adapter.set_ipv4_address({ 127, 0, 0, 1 });
69 adapter.set_ipv4_netmask({ 255, 0, 0, 0 });
70 }
71
72 adapter.on_receive = [&]() {
73 pending_packets++;
74 packet_wait_queue.wake_all();
75 };
76 });
77
78 auto dequeue_packet = [&pending_packets](u8* buffer, size_t buffer_size, Time& packet_timestamp) -> size_t {
79 if (pending_packets == 0)
80 return 0;
81 size_t packet_size = 0;
82 NetworkingManagement::the().for_each([&](auto& adapter) {
83 if (packet_size || !adapter.has_queued_packets())
84 return;
85 packet_size = adapter.dequeue_packet(buffer, buffer_size, packet_timestamp);
86 pending_packets--;
87 dbgln_if(NETWORK_TASK_DEBUG, "NetworkTask: Dequeued packet from {} ({} bytes)", adapter.name(), packet_size);
88 });
89 return packet_size;
90 };
91
92 size_t buffer_size = 64 * KiB;
93 auto region_or_error = MM.allocate_kernel_region(buffer_size, "Kernel Packet Buffer"sv, Memory::Region::Access::ReadWrite);
94 if (region_or_error.is_error())
95 TODO();
96 auto buffer_region = region_or_error.release_value();
97 auto buffer = (u8*)buffer_region->vaddr().get();
98 Time packet_timestamp;
99
100 for (;;) {
101 flush_delayed_tcp_acks();
102 retransmit_tcp_packets();
103 size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
104 if (!packet_size) {
105 auto timeout_time = Time::from_milliseconds(500);
106 auto timeout = Thread::BlockTimeout { false, &timeout_time };
107 [[maybe_unused]] auto result = packet_wait_queue.wait_on(timeout, "NetworkTask"sv);
108 continue;
109 }
110 if (packet_size < sizeof(EthernetFrameHeader)) {
111 dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size);
112 continue;
113 }
114 auto& eth = *(EthernetFrameHeader const*)buffer;
115 dbgln_if(ETHERNET_DEBUG, "NetworkTask: From {} to {}, ether_type={:#04x}, packet_size={}", eth.source().to_string(), eth.destination().to_string(), eth.ether_type(), packet_size);
116
117 switch (eth.ether_type()) {
118 case EtherType::ARP:
119 handle_arp(eth, packet_size);
120 break;
121 case EtherType::IPv4:
122 handle_ipv4(eth, packet_size, packet_timestamp);
123 break;
124 case EtherType::IPv6:
125 // ignore
126 break;
127 default:
128 dbgln_if(ETHERNET_DEBUG, "NetworkTask: Unknown ethernet type {:#04x}", eth.ether_type());
129 }
130 }
131}
132
133void handle_arp(EthernetFrameHeader const& eth, size_t frame_size)
134{
135 constexpr size_t minimum_arp_frame_size = sizeof(EthernetFrameHeader) + sizeof(ARPPacket);
136 if (frame_size < minimum_arp_frame_size) {
137 dbgln("handle_arp: Frame too small ({}, need {})", frame_size, minimum_arp_frame_size);
138 return;
139 }
140 auto& packet = *static_cast<ARPPacket const*>(eth.payload());
141 if (packet.hardware_type() != 1 || packet.hardware_address_length() != sizeof(MACAddress)) {
142 dbgln("handle_arp: Hardware type not ethernet ({:#04x}, len={})", packet.hardware_type(), packet.hardware_address_length());
143 return;
144 }
145 if (packet.protocol_type() != EtherType::IPv4 || packet.protocol_address_length() != sizeof(IPv4Address)) {
146 dbgln("handle_arp: Protocol type not IPv4 ({:#04x}, len={})", packet.protocol_type(), packet.protocol_address_length());
147 return;
148 }
149
150 dbgln_if(ARP_DEBUG, "handle_arp: operation={:#04x}, sender={}/{}, target={}/{}",
151 packet.operation(),
152 packet.sender_hardware_address().to_string(),
153 packet.sender_protocol_address().to_string(),
154 packet.target_hardware_address().to_string(),
155 packet.target_protocol_address().to_string());
156
157 if (!packet.sender_hardware_address().is_zero() && !packet.sender_protocol_address().is_zero()) {
158 // Someone has this IPv4 address. I guess we can try to remember that.
159 // FIXME: Protect against ARP spamming.
160 update_arp_table(packet.sender_protocol_address(), packet.sender_hardware_address(), UpdateTable::Set);
161 }
162
163 if (packet.operation() == ARPOperation::Request) {
164 // Who has this IP address?
165 if (auto adapter = NetworkingManagement::the().from_ipv4_address(packet.target_protocol_address())) {
166 // We do!
167 dbgln("handle_arp: Responding to ARP request for my IPv4 address ({})", adapter->ipv4_address());
168 ARPPacket response;
169 response.set_operation(ARPOperation::Response);
170 response.set_target_hardware_address(packet.sender_hardware_address());
171 response.set_target_protocol_address(packet.sender_protocol_address());
172 response.set_sender_hardware_address(adapter->mac_address());
173 response.set_sender_protocol_address(adapter->ipv4_address());
174
175 adapter->send(packet.sender_hardware_address(), response);
176 }
177 return;
178 }
179}
180
181void handle_ipv4(EthernetFrameHeader const& eth, size_t frame_size, Time const& packet_timestamp)
182{
183 constexpr size_t minimum_ipv4_frame_size = sizeof(EthernetFrameHeader) + sizeof(IPv4Packet);
184 if (frame_size < minimum_ipv4_frame_size) {
185 dbgln("handle_ipv4: Frame too small ({}, need {})", frame_size, minimum_ipv4_frame_size);
186 return;
187 }
188 auto& packet = *static_cast<IPv4Packet const*>(eth.payload());
189
190 if (packet.length() < sizeof(IPv4Packet)) {
191 dbgln("handle_ipv4: IPv4 packet too short ({}, need {})", packet.length(), sizeof(IPv4Packet));
192 return;
193 }
194
195 size_t actual_ipv4_packet_length = frame_size - sizeof(EthernetFrameHeader);
196 if (packet.length() > actual_ipv4_packet_length) {
197 dbgln("handle_ipv4: IPv4 packet claims to be longer than it is ({}, actually {})", packet.length(), actual_ipv4_packet_length);
198 return;
199 }
200
201 dbgln_if(IPV4_DEBUG, "handle_ipv4: source={}, destination={}", packet.source(), packet.destination());
202
203 NetworkingManagement::the().for_each([&](auto& adapter) {
204 if (adapter.ipv4_address().is_zero() || !adapter.link_up())
205 return;
206
207 auto my_net = adapter.ipv4_address().to_u32() & adapter.ipv4_netmask().to_u32();
208 auto their_net = packet.source().to_u32() & adapter.ipv4_netmask().to_u32();
209 if (my_net == their_net)
210 update_arp_table(packet.source(), eth.source(), UpdateTable::Set);
211 });
212
213 switch ((IPv4Protocol)packet.protocol()) {
214 case IPv4Protocol::ICMP:
215 return handle_icmp(eth, packet, packet_timestamp);
216 case IPv4Protocol::UDP:
217 return handle_udp(packet, packet_timestamp);
218 case IPv4Protocol::TCP:
219 return handle_tcp(packet, packet_timestamp);
220 default:
221 dbgln_if(IPV4_DEBUG, "handle_ipv4: Unhandled protocol {:#02x}", packet.protocol());
222 break;
223 }
224}
225
226void handle_icmp(EthernetFrameHeader const& eth, IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
227{
228 auto& icmp_header = *static_cast<ICMPHeader const*>(ipv4_packet.payload());
229 dbgln_if(ICMP_DEBUG, "handle_icmp: source={}, destination={}, type={:#02x}, code={:#02x}", ipv4_packet.source().to_string(), ipv4_packet.destination().to_string(), icmp_header.type(), icmp_header.code());
230
231 {
232 Vector<NonnullRefPtr<IPv4Socket>> icmp_sockets;
233 IPv4Socket::all_sockets().with_exclusive([&](auto& sockets) {
234 for (auto& socket : sockets) {
235 if (socket.protocol() == (unsigned)IPv4Protocol::ICMP)
236 icmp_sockets.append(socket);
237 }
238 });
239 for (auto& socket : icmp_sockets)
240 socket->did_receive(ipv4_packet.source(), 0, { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
241 }
242
243 auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
244 if (!adapter)
245 return;
246
247 if (icmp_header.type() == ICMPType::EchoRequest) {
248 auto& request = reinterpret_cast<ICMPEchoPacket const&>(icmp_header);
249 dbgln("handle_icmp: EchoRequest from {}: id={}, seq={}", ipv4_packet.source(), (u16)request.identifier, (u16)request.sequence_number);
250 size_t icmp_packet_size = ipv4_packet.payload_size();
251 if (icmp_packet_size < sizeof(ICMPEchoPacket)) {
252 dbgln("handle_icmp: EchoRequest packet is too small, ignoring.");
253 return;
254 }
255 auto ipv4_payload_offset = adapter->ipv4_payload_offset();
256 auto packet = adapter->acquire_packet_buffer(ipv4_payload_offset + icmp_packet_size);
257 if (!packet) {
258 dbgln("Could not allocate packet buffer while sending ICMP packet");
259 return;
260 }
261 adapter->fill_in_ipv4_header(*packet, adapter->ipv4_address(), eth.source(), ipv4_packet.source(), IPv4Protocol::ICMP, icmp_packet_size, 0, 64);
262 memset(packet->buffer->data() + ipv4_payload_offset, 0, sizeof(ICMPEchoPacket));
263 auto& response = *(ICMPEchoPacket*)(packet->buffer->data() + ipv4_payload_offset);
264 response.header.set_type(ICMPType::EchoReply);
265 response.header.set_code(0);
266 response.identifier = request.identifier;
267 response.sequence_number = request.sequence_number;
268 if (size_t icmp_payload_size = icmp_packet_size - sizeof(ICMPEchoPacket))
269 memcpy(response.payload(), request.payload(), icmp_payload_size);
270 response.header.set_checksum(internet_checksum(&response, icmp_packet_size));
271 // FIXME: What is the right TTL value here? Is 64 ok? Should we use the same TTL as the echo request?
272 adapter->send_packet(packet->bytes());
273 adapter->release_packet_buffer(*packet);
274 }
275}
276
277void handle_udp(IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
278{
279 if (ipv4_packet.payload_size() < sizeof(UDPPacket)) {
280 dbgln("handle_udp: Packet too small ({}, need {})", ipv4_packet.payload_size(), sizeof(UDPPacket));
281 return;
282 }
283
284 auto& udp_packet = *static_cast<UDPPacket const*>(ipv4_packet.payload());
285 dbgln_if(UDP_DEBUG, "handle_udp: source={}:{}, destination={}:{}, length={}",
286 ipv4_packet.source(), udp_packet.source_port(),
287 ipv4_packet.destination(), udp_packet.destination_port(),
288 udp_packet.length());
289
290 auto socket = UDPSocket::from_port(udp_packet.destination_port());
291 if (!socket) {
292 dbgln_if(UDP_DEBUG, "handle_udp: No local UDP socket for {}:{}", ipv4_packet.destination(), udp_packet.destination_port());
293 return;
294 }
295
296 VERIFY(socket->type() == SOCK_DGRAM);
297 VERIFY(socket->local_port() == udp_packet.destination_port());
298
299 auto& destination = ipv4_packet.destination();
300
301 if (destination == IPv4Address(255, 255, 255, 255) || NetworkingManagement::the().from_ipv4_address(destination) || socket->multicast_memberships().contains_slow(destination))
302 socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
303}
304
305void send_delayed_tcp_ack(TCPSocket& socket)
306{
307 VERIFY(socket.mutex().is_locked());
308 if (!socket.should_delay_next_ack()) {
309 [[maybe_unused]] auto result = socket.send_ack();
310 return;
311 }
312
313 delayed_ack_sockets->set(move(socket));
314}
315
316void flush_delayed_tcp_acks()
317{
318 Vector<NonnullRefPtr<TCPSocket>, 32> remaining_sockets;
319 for (auto& socket : *delayed_ack_sockets) {
320 MutexLocker locker(socket->mutex());
321 if (socket->should_delay_next_ack()) {
322 MUST(remaining_sockets.try_append(*socket));
323 continue;
324 }
325 [[maybe_unused]] auto result = socket->send_ack();
326 }
327
328 if (remaining_sockets.size() != delayed_ack_sockets->size()) {
329 delayed_ack_sockets->clear();
330 if (remaining_sockets.size() > 0)
331 dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size());
332 for (auto&& socket : remaining_sockets)
333 delayed_ack_sockets->set(move(socket));
334 }
335}
336
337void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, LockRefPtr<NetworkAdapter> adapter)
338{
339 auto routing_decision = route_to(ipv4_packet.source(), ipv4_packet.destination(), adapter);
340 if (routing_decision.is_zero())
341 return;
342
343 auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset();
344
345 const size_t options_size = 0;
346 const size_t tcp_header_size = sizeof(TCPPacket) + options_size;
347 const size_t buffer_size = ipv4_payload_offset + tcp_header_size;
348
349 auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size);
350 if (!packet)
351 return;
352 routing_decision.adapter->fill_in_ipv4_header(*packet, ipv4_packet.destination(),
353 routing_decision.next_hop, ipv4_packet.source(), IPv4Protocol::TCP,
354 buffer_size - ipv4_payload_offset, 0, 64);
355
356 auto& rst_packet = *(TCPPacket*)(packet->buffer->data() + ipv4_payload_offset);
357 rst_packet = {};
358 rst_packet.set_source_port(tcp_packet.destination_port());
359 rst_packet.set_destination_port(tcp_packet.source_port());
360 rst_packet.set_window_size(0);
361 rst_packet.set_sequence_number(0);
362 rst_packet.set_ack_number(tcp_packet.sequence_number() + 1);
363 rst_packet.set_data_offset(tcp_header_size / sizeof(u32));
364 rst_packet.set_flags(TCPFlags::RST | TCPFlags::ACK);
365 rst_packet.set_checksum(TCPSocket::compute_tcp_checksum(ipv4_packet.source(), ipv4_packet.destination(), rst_packet, 0));
366
367 routing_decision.adapter->send_packet(packet->bytes());
368 routing_decision.adapter->release_packet_buffer(*packet);
369}
370
371void handle_tcp(IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
372{
373 if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
374 dbgln("handle_tcp: IPv4 payload is too small to be a TCP packet ({}, need {})", ipv4_packet.payload_size(), sizeof(TCPPacket));
375 return;
376 }
377
378 auto& tcp_packet = *static_cast<TCPPacket const*>(ipv4_packet.payload());
379
380 size_t minimum_tcp_header_size = 5 * sizeof(u32);
381 size_t maximum_tcp_header_size = 15 * sizeof(u32);
382 if (tcp_packet.header_size() < minimum_tcp_header_size || tcp_packet.header_size() > maximum_tcp_header_size) {
383 dbgln("handle_tcp: TCP packet header has invalid size {}", tcp_packet.header_size());
384 }
385
386 if (ipv4_packet.payload_size() < tcp_packet.header_size()) {
387 dbgln("handle_tcp: IPv4 payload is smaller than TCP header claims ({}, supposedly {})", ipv4_packet.payload_size(), tcp_packet.header_size());
388 return;
389 }
390
391 size_t payload_size = ipv4_packet.payload_size() - tcp_packet.header_size();
392
393 dbgln_if(TCP_DEBUG, "handle_tcp: source={}:{}, destination={}:{}, seq_no={}, ack_no={}, flags={:#04x} ({}{}{}{}), window_size={}, payload_size={}",
394 ipv4_packet.source().to_string(),
395 tcp_packet.source_port(),
396 ipv4_packet.destination().to_string(),
397 tcp_packet.destination_port(),
398 tcp_packet.sequence_number(),
399 tcp_packet.ack_number(),
400 tcp_packet.flags(),
401 tcp_packet.has_syn() ? "SYN " : "",
402 tcp_packet.has_ack() ? "ACK " : "",
403 tcp_packet.has_fin() ? "FIN " : "",
404 tcp_packet.has_rst() ? "RST " : "",
405 tcp_packet.window_size(),
406 payload_size);
407
408 auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
409 if (!adapter) {
410 dbgln("handle_tcp: this packet is not for me, it's for {}", ipv4_packet.destination());
411 return;
412 }
413
414 IPv4SocketTuple tuple(ipv4_packet.destination(), tcp_packet.destination_port(), ipv4_packet.source(), tcp_packet.source_port());
415
416 dbgln_if(TCP_DEBUG, "handle_tcp: looking for socket; tuple={}", tuple.to_string());
417
418 auto socket = TCPSocket::from_tuple(tuple);
419 if (!socket) {
420 if (!tcp_packet.has_rst()) {
421 dbgln("handle_tcp: No TCP socket for tuple {}. Sending RST.", tuple.to_string());
422 send_tcp_rst(ipv4_packet, tcp_packet, adapter);
423 }
424 return;
425 }
426
427 MutexLocker locker(socket->mutex());
428
429 VERIFY(socket->type() == SOCK_STREAM);
430 VERIFY(socket->local_port() == tcp_packet.destination_port());
431
432 dbgln_if(TCP_DEBUG, "handle_tcp: got socket {}; state={}", socket->tuple().to_string(), TCPSocket::to_string(socket->state()));
433
434 socket->receive_tcp_packet(tcp_packet, ipv4_packet.payload_size());
435
436 switch (socket->state()) {
437 case TCPSocket::State::Closed:
438 dbgln("handle_tcp: unexpected flags in Closed state ({:x})", tcp_packet.flags());
439 // TODO: we may want to send an RST here, maybe as a configurable option
440 return;
441 case TCPSocket::State::TimeWait:
442 dbgln("handle_tcp: unexpected flags in TimeWait state ({:x})", tcp_packet.flags());
443 (void)socket->send_tcp_packet(TCPFlags::RST);
444 socket->set_state(TCPSocket::State::Closed);
445 return;
446 case TCPSocket::State::Listen:
447 switch (tcp_packet.flags()) {
448 case TCPFlags::SYN: {
449 dbgln_if(TCP_DEBUG, "handle_tcp: incoming connection");
450 auto& local_address = ipv4_packet.destination();
451 auto& peer_address = ipv4_packet.source();
452 auto client_or_error = socket->try_create_client(local_address, tcp_packet.destination_port(), peer_address, tcp_packet.source_port());
453 if (client_or_error.is_error()) {
454 dmesgln("handle_tcp: couldn't create client socket: {}", client_or_error.error());
455 return;
456 }
457 auto client = client_or_error.release_value();
458 MutexLocker locker(client->mutex());
459 dbgln_if(TCP_DEBUG, "handle_tcp: created new client socket with tuple {}", client->tuple().to_string());
460 client->set_sequence_number(1000);
461 client->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
462 [[maybe_unused]] auto rc2 = client->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
463 client->set_state(TCPSocket::State::SynReceived);
464 return;
465 }
466 default:
467 dbgln("handle_tcp: unexpected flags in Listen state ({:x})", tcp_packet.flags());
468 // socket->send_tcp_packet(TCPFlags::RST);
469 return;
470 }
471 case TCPSocket::State::SynSent:
472 switch (tcp_packet.flags()) {
473 case TCPFlags::SYN:
474 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
475 (void)socket->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
476 socket->set_state(TCPSocket::State::SynReceived);
477 return;
478 case TCPFlags::ACK | TCPFlags::SYN:
479 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
480 (void)socket->send_ack(true);
481 socket->set_state(TCPSocket::State::Established);
482 socket->set_setup_state(Socket::SetupState::Completed);
483 socket->set_connected(true);
484 return;
485 case TCPFlags::ACK | TCPFlags::FIN:
486 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
487 send_delayed_tcp_ack(*socket);
488 socket->set_state(TCPSocket::State::Closed);
489 socket->set_error(TCPSocket::Error::FINDuringConnect);
490 socket->set_setup_state(Socket::SetupState::Completed);
491 return;
492 case TCPFlags::ACK | TCPFlags::RST:
493 socket->set_state(TCPSocket::State::Closed);
494 socket->set_error(TCPSocket::Error::RSTDuringConnect);
495 socket->set_setup_state(Socket::SetupState::Completed);
496 return;
497 default:
498 dbgln("handle_tcp: unexpected flags in SynSent state ({:x})", tcp_packet.flags());
499 (void)socket->send_tcp_packet(TCPFlags::RST);
500 socket->set_state(TCPSocket::State::Closed);
501 socket->set_error(TCPSocket::Error::UnexpectedFlagsDuringConnect);
502 socket->set_setup_state(Socket::SetupState::Completed);
503 return;
504 }
505 case TCPSocket::State::SynReceived:
506 switch (tcp_packet.flags()) {
507 case TCPFlags::ACK:
508 socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
509
510 switch (socket->direction()) {
511 case TCPSocket::Direction::Incoming:
512 if (!socket->has_originator()) {
513 dbgln("handle_tcp: connection doesn't have an originating socket; maybe it went away?");
514 (void)socket->send_tcp_packet(TCPFlags::RST);
515 socket->set_state(TCPSocket::State::Closed);
516 return;
517 }
518
519 socket->set_state(TCPSocket::State::Established);
520 socket->set_setup_state(Socket::SetupState::Completed);
521 socket->release_to_originator();
522 return;
523 case TCPSocket::Direction::Outgoing:
524 socket->set_state(TCPSocket::State::Established);
525 socket->set_setup_state(Socket::SetupState::Completed);
526 socket->set_connected(true);
527 return;
528 default:
529 dbgln("handle_tcp: got ACK in SynReceived state but direction is invalid ({})", TCPSocket::to_string(socket->direction()));
530 (void)socket->send_tcp_packet(TCPFlags::RST);
531 socket->set_state(TCPSocket::State::Closed);
532 return;
533 }
534 VERIFY_NOT_REACHED();
535
536 case TCPFlags::SYN:
537 dbgln("handle_tcp: ignoring SYN for partially established connection");
538 return;
539 default:
540 dbgln("handle_tcp: unexpected flags in SynReceived state ({:x})", tcp_packet.flags());
541 (void)socket->send_tcp_packet(TCPFlags::RST);
542 socket->set_state(TCPSocket::State::Closed);
543 return;
544 }
545 case TCPSocket::State::CloseWait:
546 switch (tcp_packet.flags()) {
547 default:
548 dbgln("handle_tcp: unexpected flags in CloseWait state ({:x})", tcp_packet.flags());
549 (void)socket->send_tcp_packet(TCPFlags::RST);
550 socket->set_state(TCPSocket::State::Closed);
551 return;
552 }
553 case TCPSocket::State::LastAck:
554 switch (tcp_packet.flags()) {
555 case TCPFlags::ACK:
556 socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
557 socket->set_state(TCPSocket::State::Closed);
558 return;
559 default:
560 dbgln("handle_tcp: unexpected flags in LastAck state ({:x})", tcp_packet.flags());
561 (void)socket->send_tcp_packet(TCPFlags::RST);
562 socket->set_state(TCPSocket::State::Closed);
563 return;
564 }
565 case TCPSocket::State::FinWait1:
566 switch (tcp_packet.flags()) {
567 case TCPFlags::ACK:
568 socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
569 socket->set_state(TCPSocket::State::FinWait2);
570 return;
571 case TCPFlags::FIN:
572 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
573 socket->set_state(TCPSocket::State::Closing);
574 (void)socket->send_ack(true);
575 return;
576 case TCPFlags::FIN | TCPFlags::ACK:
577 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
578 socket->set_state(TCPSocket::State::TimeWait);
579 (void)socket->send_ack(true);
580 return;
581 default:
582 dbgln("handle_tcp: unexpected flags in FinWait1 state ({:x})", tcp_packet.flags());
583 (void)socket->send_tcp_packet(TCPFlags::RST);
584 socket->set_state(TCPSocket::State::Closed);
585 return;
586 }
587 case TCPSocket::State::FinWait2:
588 switch (tcp_packet.flags()) {
589 case TCPFlags::FIN:
590 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
591 socket->set_state(TCPSocket::State::TimeWait);
592 (void)socket->send_ack(true);
593 return;
594 case TCPFlags::ACK | TCPFlags::RST:
595 // FIXME: Verify that this transition is legitimate.
596 socket->set_state(TCPSocket::State::Closed);
597 return;
598 default:
599 dbgln("handle_tcp: unexpected flags in FinWait2 state ({:x})", tcp_packet.flags());
600 (void)socket->send_tcp_packet(TCPFlags::RST);
601 socket->set_state(TCPSocket::State::Closed);
602 return;
603 }
604 case TCPSocket::State::Closing:
605 switch (tcp_packet.flags()) {
606 case TCPFlags::ACK:
607 socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
608 socket->set_state(TCPSocket::State::TimeWait);
609 return;
610 default:
611 dbgln("handle_tcp: unexpected flags in Closing state ({:x})", tcp_packet.flags());
612 (void)socket->send_tcp_packet(TCPFlags::RST);
613 socket->set_state(TCPSocket::State::Closed);
614 return;
615 }
616 case TCPSocket::State::Established:
617 if (tcp_packet.has_rst()) {
618 socket->set_state(TCPSocket::State::Closed);
619 return;
620 }
621
622 if (tcp_packet.sequence_number() != socket->ack_number()) {
623 dbgln_if(TCP_DEBUG, "Discarding out of order packet: seq {} vs. ack {}", tcp_packet.sequence_number(), socket->ack_number());
624 if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) {
625 dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission");
626 socket->set_duplicate_acks(socket->duplicate_acks() + 1);
627 [[maybe_unused]] auto result = socket->send_ack(true);
628 }
629 return;
630 }
631
632 socket->set_duplicate_acks(0);
633
634 if (tcp_packet.has_fin()) {
635 if (payload_size != 0)
636 socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
637
638 socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
639 send_delayed_tcp_ack(*socket);
640 socket->set_state(TCPSocket::State::CloseWait);
641 socket->set_connected(false);
642 return;
643 }
644
645 if (payload_size) {
646 if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp)) {
647 socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
648 dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
649 tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
650 send_delayed_tcp_ack(*socket);
651 }
652 }
653 }
654}
655
656void retransmit_tcp_packets()
657{
658 // We must keep the sockets alive until after we've unlocked the hash table
659 // in case retransmit_packets() realizes that it wants to close the socket.
660 Vector<NonnullRefPtr<TCPSocket>, 16> sockets;
661 TCPSocket::sockets_for_retransmit().for_each_shared([&](auto const& socket) {
662 // We ignore allocation failures above the first 16 guaranteed socket slots, as
663 // we will just retransmit their packets the next time around
664 (void)sockets.try_append(socket);
665 });
666
667 for (auto& socket : sockets) {
668 MutexLocker socket_locker(socket->mutex());
669 socket->retransmit_packets();
670 }
671}
672
673}