Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net-tcp: Fast Open base

This patch impelements the common code for both the client and server.

1. TCP Fast Open option processing. Since Fast Open does not have an
option number assigned by IANA yet, it shares the experiment option
code 254 by implementing draft-ietf-tcpm-experimental-options
with a 16 bits magic number 0xF989. This enables global experiments
without clashing the scarce(2) experimental options available for TCP.

When the draft status becomes standard (maybe), the client should
switch to the new option number assigned while the server supports
both numbers for transistion.

2. The new sysctl tcp_fastopen

3. A place holder init function

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yuchung Cheng and committed by
David S. Miller
2100c8d2 4cce66cd

+86 -16
+10
include/linux/tcp.h
··· 243 243 return (tcp_hdr(skb)->doff - 5) * 4; 244 244 } 245 245 246 + /* TCP Fast Open */ 247 + #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ 248 + #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ 249 + 250 + /* TCP Fast Open Cookie as stored in memory */ 251 + struct tcp_fastopen_cookie { 252 + s8 len; 253 + u8 val[TCP_FASTOPEN_COOKIE_MAX]; 254 + }; 255 + 246 256 /* This defines a selective acknowledgement block. */ 247 257 struct tcp_sack_block_wire { 248 258 __be32 start_seq;
+8 -1
include/net/tcp.h
··· 170 170 #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ 171 171 #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ 172 172 #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ 173 + #define TCPOPT_EXP 254 /* Experimental */ 174 + /* Magic number to be after the option value for sharing TCP 175 + * experimental options. See draft-ietf-tcpm-experimental-options-00.txt 176 + */ 177 + #define TCPOPT_FASTOPEN_MAGIC 0xF989 173 178 174 179 /* 175 180 * TCP option lengths ··· 185 180 #define TCPOLEN_SACK_PERM 2 186 181 #define TCPOLEN_TIMESTAMP 10 187 182 #define TCPOLEN_MD5SIG 18 183 + #define TCPOLEN_EXP_FASTOPEN_BASE 4 188 184 #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ 189 185 #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ 190 186 #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) ··· 228 222 extern int sysctl_tcp_retries2; 229 223 extern int sysctl_tcp_orphan_retries; 230 224 extern int sysctl_tcp_syncookies; 225 + extern int sysctl_tcp_fastopen; 231 226 extern int sysctl_tcp_retrans_collapse; 232 227 extern int sysctl_tcp_stdurg; 233 228 extern int sysctl_tcp_rfc1337; ··· 425 418 size_t len, int nonblock, int flags, int *addr_len); 426 419 extern void tcp_parse_options(const struct sk_buff *skb, 427 420 struct tcp_options_received *opt_rx, const u8 **hvpp, 428 - int estab); 421 + int estab, struct tcp_fastopen_cookie *foc); 429 422 extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); 430 423 431 424 /*
+1 -1
net/ipv4/Makefile
··· 7 7 ip_output.o ip_sockglue.o inet_hashtables.o \ 8 8 inet_timewait_sock.o inet_connection_sock.o \ 9 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 10 - tcp_minisocks.o tcp_cong.o tcp_metrics.o \ 10 + tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ 11 11 datagram.o raw.o udp.o udplite.o \ 12 12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 13 13 fib_frontend.o fib_semantics.o fib_trie.o \
+1 -1
net/ipv4/syncookies.c
··· 293 293 294 294 /* check for timestamp cookie support */ 295 295 memset(&tcp_opt, 0, sizeof(tcp_opt)); 296 - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 296 + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 297 297 298 298 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) 299 299 goto out;
+7
net/ipv4/sysctl_net_ipv4.c
··· 367 367 }, 368 368 #endif 369 369 { 370 + .procname = "tcp_fastopen", 371 + .data = &sysctl_tcp_fastopen, 372 + .maxlen = sizeof(int), 373 + .mode = 0644, 374 + .proc_handler = proc_dointvec, 375 + }, 376 + { 370 377 .procname = "tcp_tw_recycle", 371 378 .data = &tcp_death_row.sysctl_tw_recycle, 372 379 .maxlen = sizeof(int),
+11
net/ipv4/tcp_fastopen.c
··· 1 + #include <linux/init.h> 2 + #include <linux/kernel.h> 3 + 4 + int sysctl_tcp_fastopen; 5 + 6 + static int __init tcp_fastopen_init(void) 7 + { 8 + return 0; 9 + } 10 + 11 + late_initcall(tcp_fastopen_init);
+22 -4
net/ipv4/tcp_input.c
··· 3732 3732 * the fast version below fails. 3733 3733 */ 3734 3734 void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, 3735 - const u8 **hvpp, int estab) 3735 + const u8 **hvpp, int estab, 3736 + struct tcp_fastopen_cookie *foc) 3736 3737 { 3737 3738 const unsigned char *ptr; 3738 3739 const struct tcphdr *th = tcp_hdr(skb); ··· 3840 3839 break; 3841 3840 } 3842 3841 break; 3843 - } 3844 3842 3843 + case TCPOPT_EXP: 3844 + /* Fast Open option shares code 254 using a 3845 + * 16 bits magic number. It's valid only in 3846 + * SYN or SYN-ACK with an even size. 3847 + */ 3848 + if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || 3849 + get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || 3850 + foc == NULL || !th->syn || (opsize & 1)) 3851 + break; 3852 + foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; 3853 + if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && 3854 + foc->len <= TCP_FASTOPEN_COOKIE_MAX) 3855 + memcpy(foc->val, ptr + 2, foc->len); 3856 + else if (foc->len != 0) 3857 + foc->len = -1; 3858 + break; 3859 + 3860 + } 3845 3861 ptr += opsize-2; 3846 3862 length -= opsize; 3847 3863 } ··· 3900 3882 if (tcp_parse_aligned_timestamp(tp, th)) 3901 3883 return true; 3902 3884 } 3903 - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); 3885 + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); 3904 3886 return true; 3905 3887 } 3906 3888 ··· 5655 5637 struct tcp_cookie_values *cvp = tp->cookie_values; 5656 5638 int saved_clamp = tp->rx_opt.mss_clamp; 5657 5639 5658 - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); 5640 + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, NULL); 5659 5641 5660 5642 if (th->ack) { 5661 5643 /* rfc793:
+1 -1
net/ipv4/tcp_ipv4.c
··· 1307 1307 tcp_clear_options(&tmp_opt); 1308 1308 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1309 1309 tmp_opt.user_mss = tp->rx_opt.user_mss; 1310 - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 1310 + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 1311 1311 1312 1312 if (tmp_opt.cookie_plus > 0 && 1313 1313 tmp_opt.saw_tstamp &&
+2 -2
net/ipv4/tcp_minisocks.c
··· 97 97 98 98 tmp_opt.saw_tstamp = 0; 99 99 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 100 - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 100 + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 101 101 102 102 if (tmp_opt.saw_tstamp) { 103 103 tmp_opt.ts_recent = tcptw->tw_ts_recent; ··· 534 534 535 535 tmp_opt.saw_tstamp = 0; 536 536 if (th->doff > (sizeof(struct tcphdr)>>2)) { 537 - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 537 + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 538 538 539 539 if (tmp_opt.saw_tstamp) { 540 540 tmp_opt.ts_recent = req->ts_recent;
+21 -4
net/ipv4/tcp_output.c
··· 385 385 #define OPTION_MD5 (1 << 2) 386 386 #define OPTION_WSCALE (1 << 3) 387 387 #define OPTION_COOKIE_EXTENSION (1 << 4) 388 + #define OPTION_FAST_OPEN_COOKIE (1 << 8) 388 389 389 390 struct tcp_out_options { 390 - u8 options; /* bit field of OPTION_* */ 391 + u16 options; /* bit field of OPTION_* */ 392 + u16 mss; /* 0 to disable */ 391 393 u8 ws; /* window scale, 0 to disable */ 392 394 u8 num_sack_blocks; /* number of SACK blocks to include */ 393 395 u8 hash_size; /* bytes in hash_location */ 394 - u16 mss; /* 0 to disable */ 395 - __u32 tsval, tsecr; /* need to include OPTION_TS */ 396 396 __u8 *hash_location; /* temporary pointer, overloaded */ 397 + __u32 tsval, tsecr; /* need to include OPTION_TS */ 398 + struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 397 399 }; 398 400 399 401 /* The sysctl int routines are generic, so check consistency here. ··· 444 442 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, 445 443 struct tcp_out_options *opts) 446 444 { 447 - u8 options = opts->options; /* mungable copy */ 445 + u16 options = opts->options; /* mungable copy */ 448 446 449 447 /* Having both authentication and cookies for security is redundant, 450 448 * and there's certainly not enough room. Instead, the cookie-less ··· 565 563 } 566 564 567 565 tp->rx_opt.dsack = 0; 566 + } 567 + 568 + if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { 569 + struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; 570 + 571 + *ptr++ = htonl((TCPOPT_EXP << 24) | 572 + ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | 573 + TCPOPT_FASTOPEN_MAGIC); 574 + 575 + memcpy(ptr, foc->val, foc->len); 576 + if ((foc->len & 3) == 2) { 577 + u8 *align = ((u8 *)ptr) + foc->len; 578 + align[0] = align[1] = TCPOPT_NOP; 579 + } 580 + ptr += (foc->len + 3) >> 2; 568 581 } 569 582 } 570 583
+1 -1
net/ipv6/syncookies.c
··· 177 177 178 178 /* check for timestamp cookie support */ 179 179 memset(&tcp_opt, 0, sizeof(tcp_opt)); 180 - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 180 + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 181 181 182 182 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) 183 183 goto out;
+1 -1
net/ipv6/tcp_ipv6.c
··· 1033 1033 tcp_clear_options(&tmp_opt); 1034 1034 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 1035 1035 tmp_opt.user_mss = tp->rx_opt.user_mss; 1036 - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 1036 + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 1037 1037 1038 1038 if (tmp_opt.cookie_plus > 0 && 1039 1039 tmp_opt.saw_tstamp &&