Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)

sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2)
and write(2). The application should replace connect() with it to
send data in the opening SYN packet.

For blocking socket, sendmsg() blocks until all the data are buffered
locally and the handshake is completed like connect() call. It
returns similar errno like connect() if the TCP handshake fails.

For non-blocking socket, it returns the number of bytes queued (and
transmitted in the SYN-data packet) if cookie is available. If cookie
is not available, it transmits a data-less SYN packet with Fast Open
cookie request option and returns -EINPROGRESS like connect().

Using MSG_FASTOPEN on connecting or connected socket will result in
simlar errno like repeating connect() calls. Therefore the application
should only use this flag on new sockets.

The buffer size of sendmsg() is independent of the MSS of the connection.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yuchung Cheng and committed by
David S. Miller
cf60af03 8e4178c1

+92 -12
+11
Documentation/networking/ip-sysctl.txt
··· 468 468 SYN flood warnings in logs not being really flooded, your server 469 469 is seriously misconfigured. 470 470 471 + tcp_fastopen - INTEGER 472 + Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data 473 + in the opening SYN packet. To use this feature, the client application 474 + must not use connect(). Instead, it should use sendmsg() or sendto() 475 + with MSG_FASTOPEN flag which performs a TCP handshake automatically. 476 + 477 + The values (bitmap) are: 478 + 1: Enables sending data in the opening SYN on the client 479 + 480 + Default: 0 481 + 471 482 tcp_syn_retries - INTEGER 472 483 Number of times initial SYNs for an active TCP connection attempt 473 484 will be retransmitted. Should not be higher than 255. Default value
+1
include/linux/socket.h
··· 268 268 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ 269 269 #define MSG_EOF MSG_FIN 270 270 271 + #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ 271 272 #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file 272 273 descriptor received through 273 274 SCM_RIGHTS */
+4 -2
include/net/inet_common.h
··· 14 14 struct socket; 15 15 16 16 extern int inet_release(struct socket *sock); 17 - extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, 17 + extern int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 18 18 int addr_len, int flags); 19 - extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 19 + extern int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 20 + int addr_len, int flags); 21 + extern int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, 20 22 int addr_len, int flags); 21 23 extern int inet_accept(struct socket *sock, struct socket *newsock, int flags); 22 24 extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
+3
include/net/tcp.h
··· 212 212 /* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */ 213 213 #define TCP_INIT_CWND 10 214 214 215 + /* Bit Flags for sysctl_tcp_fastopen */ 216 + #define TFO_CLIENT_ENABLE 1 217 + 215 218 extern struct inet_timewait_death_row tcp_death_row; 216 219 217 220 /* sysctl variables for tcp */
+14 -5
net/ipv4/af_inet.c
··· 585 585 * Connect to a remote host. There is regrettably still a little 586 586 * TCP 'magic' in here. 587 587 */ 588 - int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 589 - int addr_len, int flags) 588 + int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 589 + int addr_len, int flags) 590 590 { 591 591 struct sock *sk = sock->sk; 592 592 int err; ··· 594 594 595 595 if (addr_len < sizeof(uaddr->sa_family)) 596 596 return -EINVAL; 597 - 598 - lock_sock(sk); 599 597 600 598 if (uaddr->sa_family == AF_UNSPEC) { 601 599 err = sk->sk_prot->disconnect(sk, flags); ··· 661 663 sock->state = SS_CONNECTED; 662 664 err = 0; 663 665 out: 664 - release_sock(sk); 665 666 return err; 666 667 667 668 sock_error: ··· 669 672 if (sk->sk_prot->disconnect(sk, flags)) 670 673 sock->state = SS_DISCONNECTING; 671 674 goto out; 675 + } 676 + EXPORT_SYMBOL(__inet_stream_connect); 677 + 678 + int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 679 + int addr_len, int flags) 680 + { 681 + int err; 682 + 683 + lock_sock(sock->sk); 684 + err = __inet_stream_connect(sock, uaddr, addr_len, flags); 685 + release_sock(sock->sk); 686 + return err; 672 687 } 673 688 EXPORT_SYMBOL(inet_stream_connect); 674 689
+56 -5
net/ipv4/tcp.c
··· 270 270 #include <linux/slab.h> 271 271 272 272 #include <net/icmp.h> 273 + #include <net/inet_common.h> 273 274 #include <net/tcp.h> 274 275 #include <net/xfrm.h> 275 276 #include <net/ip.h> ··· 983 982 return tmp; 984 983 } 985 984 985 + void tcp_free_fastopen_req(struct tcp_sock *tp) 986 + { 987 + if (tp->fastopen_req != NULL) { 988 + kfree(tp->fastopen_req); 989 + tp->fastopen_req = NULL; 990 + } 991 + } 992 + 993 + static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) 994 + { 995 + struct tcp_sock *tp = tcp_sk(sk); 996 + int err, flags; 997 + 998 + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) 999 + return -EOPNOTSUPP; 1000 + if (tp->fastopen_req != NULL) 1001 + return -EALREADY; /* Another Fast Open is in progress */ 1002 + 1003 + tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), 1004 + sk->sk_allocation); 1005 + if (unlikely(tp->fastopen_req == NULL)) 1006 + return -ENOBUFS; 1007 + tp->fastopen_req->data = msg; 1008 + 1009 + flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; 1010 + err = __inet_stream_connect(sk->sk_socket, msg->msg_name, 1011 + msg->msg_namelen, flags); 1012 + *size = tp->fastopen_req->copied; 1013 + tcp_free_fastopen_req(tp); 1014 + return err; 1015 + } 1016 + 986 1017 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 987 1018 size_t size) 988 1019 { 989 1020 struct iovec *iov; 990 1021 struct tcp_sock *tp = tcp_sk(sk); 991 1022 struct sk_buff *skb; 992 - int iovlen, flags, err, copied; 993 - int mss_now = 0, size_goal; 1023 + int iovlen, flags, err, copied = 0; 1024 + int mss_now = 0, size_goal, copied_syn = 0, offset = 0; 994 1025 bool sg; 995 1026 long timeo; 996 1027 997 1028 lock_sock(sk); 998 1029 999 1030 flags = msg->msg_flags; 1031 + if (flags & MSG_FASTOPEN) { 1032 + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); 1033 + if (err == -EINPROGRESS && copied_syn > 0) 1034 + goto out; 1035 + else if (err) 1036 + goto out_err; 1037 + offset = copied_syn; 1038 + } 1039 + 1000 1040 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1001 1041 1002 1042 /* Wait for a connection to finish. */ 1003 1043 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) 1004 1044 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) 1005 - goto out_err; 1045 + goto do_error; 1006 1046 1007 1047 if (unlikely(tp->repair)) { 1008 1048 if (tp->repair_queue == TCP_RECV_QUEUE) { ··· 1079 1037 unsigned char __user *from = iov->iov_base; 1080 1038 1081 1039 iov++; 1040 + if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ 1041 + if (offset >= seglen) { 1042 + offset -= seglen; 1043 + continue; 1044 + } 1045 + seglen -= offset; 1046 + from += offset; 1047 + offset = 0; 1048 + } 1082 1049 1083 1050 while (seglen > 0) { 1084 1051 int copy = 0; ··· 1250 1199 if (copied && likely(!tp->repair)) 1251 1200 tcp_push(sk, flags, mss_now, tp->nonagle); 1252 1201 release_sock(sk); 1253 - return copied; 1202 + return copied + copied_syn; 1254 1203 1255 1204 do_fault: 1256 1205 if (!skb->len) { ··· 1263 1212 } 1264 1213 1265 1214 do_error: 1266 - if (copied) 1215 + if (copied + copied_syn) 1267 1216 goto out; 1268 1217 out_err: 1269 1218 err = sk_stream_error(sk, flags, err);
+3
net/ipv4/tcp_ipv4.c
··· 1952 1952 tp->cookie_values = NULL; 1953 1953 } 1954 1954 1955 + /* If socket is aborted during connect operation */ 1956 + tcp_free_fastopen_req(tp); 1957 + 1955 1958 sk_sockets_allocated_dec(sk); 1956 1959 sock_release_memcg(sk); 1957 1960 }