Serenity Operating System
at portability 471 lines 16 kB view raw
1/* 2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <AK/Time.h> 28#include <Kernel/Devices/RandomDevice.h> 29#include <Kernel/FileSystem/FileDescription.h> 30#include <Kernel/Net/NetworkAdapter.h> 31#include <Kernel/Net/Routing.h> 32#include <Kernel/Net/TCP.h> 33#include <Kernel/Net/TCPSocket.h> 34#include <Kernel/Process.h> 35#include <Kernel/Random.h> 36 37//#define TCP_SOCKET_DEBUG 38 39namespace Kernel { 40 41void TCPSocket::for_each(Function<void(TCPSocket&)> callback) 42{ 43 LOCKER(sockets_by_tuple().lock()); 44 for (auto& it : sockets_by_tuple().resource()) 45 callback(*it.value); 46} 47 48void TCPSocket::set_state(State new_state) 49{ 50#ifdef TCP_SOCKET_DEBUG 51 kprintf("%s(%u) TCPSocket{%p} state moving from %s to %s\n", 52 Process::current->name().characters(), Process::current->pid(), this, 53 to_string(m_state), to_string(new_state)); 54#endif 55 56 m_state = new_state; 57 58 if (new_state == State::Established && m_direction == Direction::Outgoing) 59 m_role = Role::Connected; 60 61 if (new_state == State::Closed) { 62 LOCKER(closing_sockets().lock()); 63 closing_sockets().resource().remove(tuple()); 64 } 65} 66 67Lockable<HashMap<IPv4SocketTuple, RefPtr<TCPSocket>>>& TCPSocket::closing_sockets() 68{ 69 static Lockable<HashMap<IPv4SocketTuple, RefPtr<TCPSocket>>>* s_map; 70 if (!s_map) 71 s_map = new Lockable<HashMap<IPv4SocketTuple, RefPtr<TCPSocket>>>; 72 return *s_map; 73} 74 75Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& TCPSocket::sockets_by_tuple() 76{ 77 static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>* s_map; 78 if (!s_map) 79 s_map = new Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>; 80 return *s_map; 81} 82 83RefPtr<TCPSocket> TCPSocket::from_tuple(const IPv4SocketTuple& tuple) 84{ 85 LOCKER(sockets_by_tuple().lock()); 86 87 auto exact_match = sockets_by_tuple().resource().get(tuple); 88 if (exact_match.has_value()) 89 return { *exact_match.value() }; 90 91 auto address_tuple = IPv4SocketTuple(tuple.local_address(), tuple.local_port(), IPv4Address(), 0); 92 auto address_match = sockets_by_tuple().resource().get(address_tuple); 93 if (address_match.has_value()) 94 return { *address_match.value() }; 95 96 auto wildcard_tuple = IPv4SocketTuple(IPv4Address(), tuple.local_port(), IPv4Address(), 0); 97 auto wildcard_match = sockets_by_tuple().resource().get(wildcard_tuple); 98 if (wildcard_match.has_value()) 99 return { *wildcard_match.value() }; 100 101 return {}; 102} 103 104RefPtr<TCPSocket> TCPSocket::from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port) 105{ 106 return from_tuple(IPv4SocketTuple(local_address, local_port, peer_address, peer_port)); 107} 108 109RefPtr<TCPSocket> TCPSocket::create_client(const IPv4Address& new_local_address, u16 new_local_port, const IPv4Address& new_peer_address, u16 new_peer_port) 110{ 111 auto tuple = IPv4SocketTuple(new_local_address, new_local_port, new_peer_address, new_peer_port); 112 113 LOCKER(sockets_by_tuple().lock()); 114 if (sockets_by_tuple().resource().contains(tuple)) 115 return {}; 116 117 auto client = TCPSocket::create(protocol()); 118 119 client->set_setup_state(SetupState::InProgress); 120 client->set_local_address(new_local_address); 121 client->set_local_port(new_local_port); 122 client->set_peer_address(new_peer_address); 123 client->set_peer_port(new_peer_port); 124 client->set_direction(Direction::Incoming); 125 client->set_originator(*this); 126 127 m_pending_release_for_accept.set(tuple, client); 128 sockets_by_tuple().resource().set(tuple, client); 129 130 return from_tuple(tuple); 131} 132 133void TCPSocket::release_to_originator() 134{ 135 ASSERT(!!m_originator); 136 m_originator->release_for_accept(this); 137} 138 139void TCPSocket::release_for_accept(RefPtr<TCPSocket> socket) 140{ 141 ASSERT(m_pending_release_for_accept.contains(socket->tuple())); 142 m_pending_release_for_accept.remove(socket->tuple()); 143 queue_connection_from(*socket); 144} 145 146TCPSocket::TCPSocket(int protocol) 147 : IPv4Socket(SOCK_STREAM, protocol) 148{ 149} 150 151TCPSocket::~TCPSocket() 152{ 153 LOCKER(sockets_by_tuple().lock()); 154 sockets_by_tuple().resource().remove(tuple()); 155 156#ifdef TCP_SOCKET_DEBUG 157 dbg() << "~TCPSocket in state " << to_string(state()); 158#endif 159} 160 161NonnullRefPtr<TCPSocket> TCPSocket::create(int protocol) 162{ 163 return adopt(*new TCPSocket(protocol)); 164} 165 166int TCPSocket::protocol_receive(const KBuffer& packet_buffer, void* buffer, size_t buffer_size, int flags) 167{ 168 (void)flags; 169 auto& ipv4_packet = *(const IPv4Packet*)(packet_buffer.data()); 170 auto& tcp_packet = *static_cast<const TCPPacket*>(ipv4_packet.payload()); 171 size_t payload_size = packet_buffer.size() - sizeof(IPv4Packet) - tcp_packet.header_size(); 172#ifdef TCP_SOCKET_DEBUG 173 kprintf("payload_size %u, will it fit in %u?\n", payload_size, buffer_size); 174#endif 175 ASSERT(buffer_size >= payload_size); 176 memcpy(buffer, tcp_packet.payload(), payload_size); 177 return payload_size; 178} 179 180int TCPSocket::protocol_send(const void* data, size_t data_length) 181{ 182 send_tcp_packet(TCPFlags::PUSH | TCPFlags::ACK, data, data_length); 183 return data_length; 184} 185 186void TCPSocket::send_tcp_packet(u16 flags, const void* payload, size_t payload_size) 187{ 188 auto buffer = ByteBuffer::create_zeroed(sizeof(TCPPacket) + payload_size); 189 auto& tcp_packet = *(TCPPacket*)(buffer.data()); 190 ASSERT(local_port()); 191 tcp_packet.set_source_port(local_port()); 192 tcp_packet.set_destination_port(peer_port()); 193 tcp_packet.set_window_size(1024); 194 tcp_packet.set_sequence_number(m_sequence_number); 195 tcp_packet.set_data_offset(sizeof(TCPPacket) / sizeof(u32)); 196 tcp_packet.set_flags(flags); 197 198 if (flags & TCPFlags::ACK) 199 tcp_packet.set_ack_number(m_ack_number); 200 201 if (flags & TCPFlags::SYN) { 202 ++m_sequence_number; 203 } else { 204 m_sequence_number += payload_size; 205 } 206 207 memcpy(tcp_packet.payload(), payload, payload_size); 208 tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size)); 209 210 if (tcp_packet.has_syn() || payload_size > 0) { 211 LOCKER(m_not_acked_lock); 212 m_not_acked.append({ m_sequence_number, move(buffer) }); 213 send_outgoing_packets(); 214 return; 215 } 216 217 auto routing_decision = route_to(peer_address(), local_address()); 218 ASSERT(!routing_decision.is_zero()); 219 220 routing_decision.adapter->send_ipv4( 221 routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, 222 buffer.data(), buffer.size(), ttl()); 223 224 m_packets_out++; 225 m_bytes_out += buffer.size(); 226} 227 228void TCPSocket::send_outgoing_packets() 229{ 230 auto routing_decision = route_to(peer_address(), local_address()); 231 ASSERT(!routing_decision.is_zero()); 232 233 auto now = kgettimeofday(); 234 235 LOCKER(m_not_acked_lock); 236 for (auto& packet : m_not_acked) { 237 timeval diff; 238 timeval_sub(packet.tx_time, now, diff); 239 if (diff.tv_sec == 0 && diff.tv_usec <= 500000) 240 continue; 241 packet.tx_time = now; 242 packet.tx_counter++; 243 244#ifdef TCP_SOCKET_DEBUG 245 auto& tcp_packet = *(TCPPacket*)(packet.buffer.data()); 246 kprintf("sending tcp packet from %s:%u to %s:%u with (%s%s%s%s) seq_no=%u, ack_no=%u, tx_counter=%u\n", 247 local_address().to_string().characters(), 248 local_port(), 249 peer_address().to_string().characters(), 250 peer_port(), 251 tcp_packet.has_syn() ? "SYN " : "", 252 tcp_packet.has_ack() ? "ACK " : "", 253 tcp_packet.has_fin() ? "FIN " : "", 254 tcp_packet.has_rst() ? "RST " : "", 255 tcp_packet.sequence_number(), 256 tcp_packet.ack_number(), 257 packet.tx_counter); 258#endif 259 routing_decision.adapter->send_ipv4( 260 routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, 261 packet.buffer.data(), packet.buffer.size(), ttl()); 262 263 m_packets_out++; 264 m_bytes_out += packet.buffer.size(); 265 } 266} 267 268void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size) 269{ 270 if (packet.has_ack()) { 271 u32 ack_number = packet.ack_number(); 272 273#ifdef TCP_SOCKET_DEBUG 274 dbg() << "TCPSocket: receive_tcp_packet: " << ack_number; 275#endif 276 277 int removed = 0; 278 LOCKER(m_not_acked_lock); 279 while (!m_not_acked.is_empty()) { 280 auto& packet = m_not_acked.first(); 281 282#ifdef TCP_SOCKET_DEBUG 283 dbg() << "TCPSocket: iterate: " << packet.ack_number; 284#endif 285 286 if (packet.ack_number <= ack_number) { 287 m_not_acked.take_first(); 288 removed++; 289 } else { 290 break; 291 } 292 } 293 294#ifdef TCP_SOCKET_DEBUG 295 dbg() << "TCPSocket: receive_tcp_packet acknowledged " << removed << " packets"; 296#endif 297 } 298 299 m_packets_in++; 300 m_bytes_in += packet.header_size() + size; 301} 302 303NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size) 304{ 305 struct [[gnu::packed]] PseudoHeader 306 { 307 IPv4Address source; 308 IPv4Address destination; 309 u8 zero; 310 u8 protocol; 311 NetworkOrdered<u16> payload_size; 312 }; 313 314 PseudoHeader pseudo_header { source, destination, 0, (u8)IPv4Protocol::TCP, sizeof(TCPPacket) + payload_size }; 315 316 u32 checksum = 0; 317 auto* w = (const NetworkOrdered<u16>*)&pseudo_header; 318 for (size_t i = 0; i < sizeof(pseudo_header) / sizeof(u16); ++i) { 319 checksum += w[i]; 320 if (checksum > 0xffff) 321 checksum = (checksum >> 16) + (checksum & 0xffff); 322 } 323 w = (const NetworkOrdered<u16>*)&packet; 324 for (size_t i = 0; i < sizeof(packet) / sizeof(u16); ++i) { 325 checksum += w[i]; 326 if (checksum > 0xffff) 327 checksum = (checksum >> 16) + (checksum & 0xffff); 328 } 329 ASSERT(packet.data_offset() * 4 == sizeof(TCPPacket)); 330 w = (const NetworkOrdered<u16>*)packet.payload(); 331 for (size_t i = 0; i < payload_size / sizeof(u16); ++i) { 332 checksum += w[i]; 333 if (checksum > 0xffff) 334 checksum = (checksum >> 16) + (checksum & 0xffff); 335 } 336 if (payload_size & 1) { 337 u16 expanded_byte = ((const u8*)packet.payload())[payload_size - 1] << 8; 338 checksum += expanded_byte; 339 if (checksum > 0xffff) 340 checksum = (checksum >> 16) + (checksum & 0xffff); 341 } 342 return ~(checksum & 0xffff); 343} 344 345KResult TCPSocket::protocol_bind() 346{ 347 if (has_specific_local_address() && !m_adapter) { 348 m_adapter = NetworkAdapter::from_ipv4_address(local_address()); 349 if (!m_adapter) 350 return KResult(-EADDRNOTAVAIL); 351 } 352 353 return KSuccess; 354} 355 356KResult TCPSocket::protocol_listen() 357{ 358 LOCKER(sockets_by_tuple().lock()); 359 if (sockets_by_tuple().resource().contains(tuple())) 360 return KResult(-EADDRINUSE); 361 sockets_by_tuple().resource().set(tuple(), this); 362 set_direction(Direction::Passive); 363 set_state(State::Listen); 364 set_setup_state(SetupState::Completed); 365 return KSuccess; 366} 367 368KResult TCPSocket::protocol_connect(FileDescription& description, ShouldBlock should_block) 369{ 370 auto routing_decision = route_to(peer_address(), local_address()); 371 if (routing_decision.is_zero()) 372 return KResult(-EHOSTUNREACH); 373 if (!has_specific_local_address()) 374 set_local_address(routing_decision.adapter->ipv4_address()); 375 376 allocate_local_port_if_needed(); 377 378 m_sequence_number = get_good_random<u32>(); 379 m_ack_number = 0; 380 381 set_setup_state(SetupState::InProgress); 382 send_tcp_packet(TCPFlags::SYN); 383 m_state = State::SynSent; 384 m_role = Role::Connecting; 385 m_direction = Direction::Outgoing; 386 387 if (should_block == ShouldBlock::Yes) { 388 if (Thread::current->block<Thread::ConnectBlocker>(description) != Thread::BlockResult::WokeNormally) 389 return KResult(-EINTR); 390 ASSERT(setup_state() == SetupState::Completed); 391 if (has_error()) { 392 m_role = Role::None; 393 return KResult(-ECONNREFUSED); 394 } 395 return KSuccess; 396 } 397 398 return KResult(-EINPROGRESS); 399} 400 401int TCPSocket::protocol_allocate_local_port() 402{ 403 static const u16 first_ephemeral_port = 32768; 404 static const u16 last_ephemeral_port = 60999; 405 static const u16 ephemeral_port_range_size = last_ephemeral_port - first_ephemeral_port; 406 u16 first_scan_port = first_ephemeral_port + get_good_random<u16>() % ephemeral_port_range_size; 407 408 LOCKER(sockets_by_tuple().lock()); 409 for (u16 port = first_scan_port;;) { 410 IPv4SocketTuple proposed_tuple(local_address(), port, peer_address(), peer_port()); 411 412 auto it = sockets_by_tuple().resource().find(proposed_tuple); 413 if (it == sockets_by_tuple().resource().end()) { 414 set_local_port(port); 415 sockets_by_tuple().resource().set(proposed_tuple, this); 416 return port; 417 } 418 ++port; 419 if (port > last_ephemeral_port) 420 port = first_ephemeral_port; 421 if (port == first_scan_port) 422 break; 423 } 424 return -EADDRINUSE; 425} 426 427bool TCPSocket::protocol_is_disconnected() const 428{ 429 switch (m_state) { 430 case State::Closed: 431 case State::CloseWait: 432 case State::LastAck: 433 case State::FinWait1: 434 case State::FinWait2: 435 case State::Closing: 436 case State::TimeWait: 437 return true; 438 default: 439 return false; 440 } 441} 442 443void TCPSocket::shut_down_for_writing() 444{ 445 if (state() == State::Established) { 446#ifdef TCP_SOCKET_DEBUG 447 dbg() << " Sending FIN/ACK from Established and moving into FinWait1"; 448#endif 449 send_tcp_packet(TCPFlags::FIN | TCPFlags::ACK); 450 set_state(State::FinWait1); 451 } else { 452 dbg() << " Shutting down TCPSocket for writing but not moving to FinWait1 since state is " << to_string(state()); 453 } 454} 455 456void TCPSocket::close() 457{ 458 IPv4Socket::close(); 459 if (state() == State::CloseWait) { 460#ifdef TCP_SOCKET_DEBUG 461 dbg() << " Sending FIN from CloseWait and moving into LastAck"; 462#endif 463 send_tcp_packet(TCPFlags::FIN | TCPFlags::ACK); 464 set_state(State::LastAck); 465 } 466 467 LOCKER(closing_sockets().lock()); 468 closing_sockets().resource().set(tuple(), *this); 469} 470 471}