Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net-tcp: Fast Open client - sending SYN-data

This patch implements sending SYN-data in tcp_connect(). The data is
from tcp_sendmsg() with flag MSG_FASTOPEN (implemented in a later patch).

The length of the cookie in tcp_fastopen_req, init'd to 0, controls the
type of the SYN. If the cookie is not cached (len==0), the host sends
data-less SYN with Fast Open cookie request option to solicit a cookie
from the remote. If cookie is not available (len > 0), the host sends
a SYN-data with Fast Open cookie option. If cookie length is negative,
the SYN will not include any Fast Open option (for fall back operations).

To deal with middleboxes that may drop SYN with data or experimental TCP
option, the SYN-data is only sent once. SYN retransmits do not include
data or Fast Open options. The connection will fall back to regular TCP
handshake.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yuchung Cheng and committed by
David S. Miller
783237e8 1fe4c481

+130 -12
+1
include/linux/snmp.h
··· 238 238 LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ 239 239 LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ 240 240 LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ 241 + LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ 241 242 __LINUX_MIB_MAX 242 243 }; 243 244
+5 -1
include/linux/tcp.h
··· 386 386 unused : 1; 387 387 u8 repair_queue; 388 388 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 389 - early_retrans_delayed:1; /* Delayed ER timer installed */ 389 + early_retrans_delayed:1, /* Delayed ER timer installed */ 390 + syn_fastopen:1; /* SYN includes Fast Open option */ 390 391 391 392 /* RTT measurement */ 392 393 u32 srtt; /* smoothed round trip time << 3 */ ··· 500 499 /* TCP MD5 Signature Option information */ 501 500 struct tcp_md5sig_info __rcu *md5sig_info; 502 501 #endif 502 + 503 + /* TCP fastopen related information */ 504 + struct tcp_fastopen_request *fastopen_req; 503 505 504 506 /* When the cookie options are generated and exchanged, then this 505 507 * object holds a reference to them (cookie_values->kref). Also
+9
include/net/tcp.h
··· 1289 1289 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, 1290 1290 const struct tcp_md5sig_key *key); 1291 1291 1292 + struct tcp_fastopen_request { 1293 + /* Fast Open cookie. Size 0 means a cookie request */ 1294 + struct tcp_fastopen_cookie cookie; 1295 + struct msghdr *data; /* data in MSG_FASTOPEN */ 1296 + u16 copied; /* queued in tcp_connect() */ 1297 + }; 1298 + 1299 + void tcp_free_fastopen_req(struct tcp_sock *tp); 1300 + 1292 1301 /* write queue abstraction */ 1293 1302 static inline void tcp_write_queue_purge(struct sock *sk) 1294 1303 {
+8 -2
net/ipv4/af_inet.c
··· 556 556 } 557 557 EXPORT_SYMBOL(inet_dgram_connect); 558 558 559 - static long inet_wait_for_connect(struct sock *sk, long timeo) 559 + static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) 560 560 { 561 561 DEFINE_WAIT(wait); 562 562 563 563 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 564 + sk->sk_write_pending += writebias; 564 565 565 566 /* Basic assumption: if someone sets sk->sk_err, he _must_ 566 567 * change state of the socket from TCP_SYN_*. ··· 577 576 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 578 577 } 579 578 finish_wait(sk_sleep(sk), &wait); 579 + sk->sk_write_pending -= writebias; 580 580 return timeo; 581 581 } 582 582 ··· 636 634 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 637 635 638 636 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 637 + int writebias = (sk->sk_protocol == IPPROTO_TCP) && 638 + tcp_sk(sk)->fastopen_req && 639 + tcp_sk(sk)->fastopen_req->data ? 1 : 0; 640 + 639 641 /* Error code is set above */ 640 - if (!timeo || !inet_wait_for_connect(sk, timeo)) 642 + if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) 641 643 goto out; 642 644 643 645 err = sock_intr_errno(timeo);
+1
net/ipv4/proc.c
··· 262 262 SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), 263 263 SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), 264 264 SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), 265 + SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), 265 266 SNMP_MIB_SENTINEL 266 267 }; 267 268
+106 -9
net/ipv4/tcp_output.c
··· 596 596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? 597 597 tcp_cookie_size_check(cvp->cookie_desired) : 598 598 0; 599 + struct tcp_fastopen_request *fastopen = tp->fastopen_req; 599 600 600 601 #ifdef CONFIG_TCP_MD5SIG 601 602 *md5 = tp->af_specific->md5_lookup(sk, sk); ··· 637 636 remaining -= TCPOLEN_SACKPERM_ALIGNED; 638 637 } 639 638 639 + if (fastopen && fastopen->cookie.len >= 0) { 640 + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; 641 + need = (need + 3) & ~3U; /* Align to 32 bits */ 642 + if (remaining >= need) { 643 + opts->options |= OPTION_FAST_OPEN_COOKIE; 644 + opts->fastopen_cookie = &fastopen->cookie; 645 + remaining -= need; 646 + tp->syn_fastopen = 1; 647 + } 648 + } 640 649 /* Note that timestamps are required by the specification. 641 650 * 642 651 * Odd numbers of bytes are prohibited by the specification, ensuring ··· 2835 2824 tcp_clear_retrans(tp); 2836 2825 } 2837 2826 2827 + static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) 2828 + { 2829 + struct tcp_sock *tp = tcp_sk(sk); 2830 + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 2831 + 2832 + tcb->end_seq += skb->len; 2833 + skb_header_release(skb); 2834 + __tcp_add_write_queue_tail(sk, skb); 2835 + sk->sk_wmem_queued += skb->truesize; 2836 + sk_mem_charge(sk, skb->truesize); 2837 + tp->write_seq = tcb->end_seq; 2838 + tp->packets_out += tcp_skb_pcount(skb); 2839 + } 2840 + 2841 + /* Build and send a SYN with data and (cached) Fast Open cookie. However, 2842 + * queue a data-only packet after the regular SYN, such that regular SYNs 2843 + * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges 2844 + * only the SYN sequence, the data are retransmitted in the first ACK. 2845 + * If cookie is not cached or other error occurs, falls back to send a 2846 + * regular SYN with Fast Open cookie request option. 2847 + */ 2848 + static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) 2849 + { 2850 + struct tcp_sock *tp = tcp_sk(sk); 2851 + struct tcp_fastopen_request *fo = tp->fastopen_req; 2852 + int space, i, err = 0, iovlen = fo->data->msg_iovlen; 2853 + struct sk_buff *syn_data = NULL, *data; 2854 + 2855 + tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie); 2856 + if (fo->cookie.len <= 0) 2857 + goto fallback; 2858 + 2859 + /* MSS for SYN-data is based on cached MSS and bounded by PMTU and 2860 + * user-MSS. Reserve maximum option space for middleboxes that add 2861 + * private TCP options. The cost is reduced data space in SYN :( 2862 + */ 2863 + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) 2864 + tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; 2865 + space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - 2866 + MAX_TCP_OPTION_SPACE; 2867 + 2868 + syn_data = skb_copy_expand(syn, skb_headroom(syn), space, 2869 + sk->sk_allocation); 2870 + if (syn_data == NULL) 2871 + goto fallback; 2872 + 2873 + for (i = 0; i < iovlen && syn_data->len < space; ++i) { 2874 + struct iovec *iov = &fo->data->msg_iov[i]; 2875 + unsigned char __user *from = iov->iov_base; 2876 + int len = iov->iov_len; 2877 + 2878 + if (syn_data->len + len > space) 2879 + len = space - syn_data->len; 2880 + else if (i + 1 == iovlen) 2881 + /* No more data pending in inet_wait_for_connect() */ 2882 + fo->data = NULL; 2883 + 2884 + if (skb_add_data(syn_data, from, len)) 2885 + goto fallback; 2886 + } 2887 + 2888 + /* Queue a data-only packet after the regular SYN for retransmission */ 2889 + data = pskb_copy(syn_data, sk->sk_allocation); 2890 + if (data == NULL) 2891 + goto fallback; 2892 + TCP_SKB_CB(data)->seq++; 2893 + TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; 2894 + TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); 2895 + tcp_connect_queue_skb(sk, data); 2896 + fo->copied = data->len; 2897 + 2898 + if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { 2899 + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 2900 + goto done; 2901 + } 2902 + syn_data = NULL; 2903 + 2904 + fallback: 2905 + /* Send a regular SYN with Fast Open cookie request option */ 2906 + if (fo->cookie.len > 0) 2907 + fo->cookie.len = 0; 2908 + err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); 2909 + if (err) 2910 + tp->syn_fastopen = 0; 2911 + kfree_skb(syn_data); 2912 + done: 2913 + fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ 2914 + return err; 2915 + } 2916 + 2838 2917 /* Build a SYN and send it off. */ 2839 2918 int tcp_connect(struct sock *sk) 2840 2919 { ··· 2942 2841 skb_reserve(buff, MAX_TCP_HEADER); 2943 2842 2944 2843 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 2844 + tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; 2845 + tcp_connect_queue_skb(sk, buff); 2945 2846 TCP_ECN_send_syn(sk, buff); 2946 2847 2947 - /* Send it off. */ 2948 - TCP_SKB_CB(buff)->when = tcp_time_stamp; 2949 - tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2950 - skb_header_release(buff); 2951 - __tcp_add_write_queue_tail(sk, buff); 2952 - sk->sk_wmem_queued += buff->truesize; 2953 - sk_mem_charge(sk, buff->truesize); 2954 - tp->packets_out += tcp_skb_pcount(buff); 2955 - err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2848 + /* Send off SYN; include data in Fast Open. */ 2849 + err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 2850 + tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2956 2851 if (err == -ECONNREFUSED) 2957 2852 return err; 2958 2853