Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp/dccp: fix ireq->opt races

syzkaller found another bug in DCCP/TCP stacks [1]

For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
ireq->pktopts race"), we need to make sure we do not access
ireq->opt unless we own the request sock.

Note the opt field is renamed to ireq_opt to ease grep games.

[1]
BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295

CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:16 [inline]
dump_stack+0x194/0x257 lib/dump_stack.c:52
print_address_description+0x73/0x250 mm/kasan/report.c:252
kasan_report_error mm/kasan/report.c:351 [inline]
kasan_report+0x25b/0x340 mm/kasan/report.c:409
__asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
__tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
dst_input include/net/dst.h:464 [inline]
ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
__netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
__netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
netif_receive_skb+0xae/0x390 net/core/dev.c:4611
tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
call_write_iter include/linux/fs.h:1770 [inline]
new_sync_write fs/read_write.c:468 [inline]
__vfs_write+0x68a/0x970 fs/read_write.c:481
vfs_write+0x18f/0x510 fs/read_write.c:543
SYSC_write fs/read_write.c:588 [inline]
SyS_write+0xef/0x220 fs/read_write.c:580
entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x40c341
RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000

Allocated by task 3295:
save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
save_stack+0x43/0xd0 mm/kasan/kasan.c:447
set_track mm/kasan/kasan.c:459 [inline]
kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
__do_kmalloc mm/slab.c:3725 [inline]
__kmalloc+0x162/0x760 mm/slab.c:3734
kmalloc include/linux/slab.h:498 [inline]
tcp_v4_save_options include/net/tcp.h:1962 [inline]
tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
dst_input include/net/dst.h:464 [inline]
ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
__netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
__netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
netif_receive_skb+0xae/0x390 net/core/dev.c:4611
tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
call_write_iter include/linux/fs.h:1770 [inline]
new_sync_write fs/read_write.c:468 [inline]
__vfs_write+0x68a/0x970 fs/read_write.c:481
vfs_write+0x18f/0x510 fs/read_write.c:543
SYSC_write fs/read_write.c:588 [inline]
SyS_write+0xef/0x220 fs/read_write.c:580
entry_SYSCALL_64_fastpath+0x1f/0xbe

Freed by task 3306:
save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
save_stack+0x43/0xd0 mm/kasan/kasan.c:447
set_track mm/kasan/kasan.c:459 [inline]
kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
__cache_free mm/slab.c:3503 [inline]
kfree+0xca/0x250 mm/slab.c:3820
inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
__sk_destruct+0xfd/0x910 net/core/sock.c:1560
sk_destruct+0x47/0x80 net/core/sock.c:1595
__sk_free+0x57/0x230 net/core/sock.c:1603
sk_free+0x2a/0x40 net/core/sock.c:1614
sock_put include/net/sock.h:1652 [inline]
inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
dst_input include/net/dst.h:464 [inline]
ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
NF_HOOK include/linux/netfilter.h:249 [inline]
ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
__netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
__netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
netif_receive_skb+0xae/0x390 net/core/dev.c:4611
tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
call_write_iter include/linux/fs.h:1770 [inline]
new_sync_write fs/read_write.c:468 [inline]
__vfs_write+0x68a/0x970 fs/read_write.c:481
vfs_write+0x18f/0x510 fs/read_write.c:543
SYSC_write fs/read_write.c:588 [inline]
SyS_write+0xef/0x220 fs/read_write.c:580
entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
c92e8c02 e95c6cf4

+34 -39
+1 -1
include/net/inet_sock.h
··· 96 96 kmemcheck_bitfield_end(flags); 97 97 u32 ir_mark; 98 98 union { 99 - struct ip_options_rcu *opt; 99 + struct ip_options_rcu __rcu *ireq_opt; 100 100 #if IS_ENABLED(CONFIG_IPV6) 101 101 struct { 102 102 struct ipv6_txoptions *ipv6_opt;
+8 -5
net/dccp/ipv4.c
··· 414 414 sk_daddr_set(newsk, ireq->ir_rmt_addr); 415 415 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 416 416 newinet->inet_saddr = ireq->ir_loc_addr; 417 - newinet->inet_opt = ireq->opt; 418 - ireq->opt = NULL; 417 + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); 419 418 newinet->mc_index = inet_iif(skb); 420 419 newinet->mc_ttl = ip_hdr(skb)->ttl; 421 420 newinet->inet_id = jiffies; ··· 429 430 if (__inet_inherit_port(sk, newsk) < 0) 430 431 goto put_and_exit; 431 432 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 432 - 433 + if (*own_req) 434 + ireq->ireq_opt = NULL; 435 + else 436 + newinet->inet_opt = NULL; 433 437 return newsk; 434 438 435 439 exit_overflow: ··· 443 441 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); 444 442 return NULL; 445 443 put_and_exit: 444 + newinet->inet_opt = NULL; 446 445 inet_csk_prepare_forced_close(newsk); 447 446 dccp_done(newsk); 448 447 goto exit; ··· 495 492 ireq->ir_rmt_addr); 496 493 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 497 494 ireq->ir_rmt_addr, 498 - ireq->opt); 495 + rcu_dereference(ireq->ireq_opt)); 499 496 err = net_xmit_eval(err); 500 497 } 501 498 ··· 551 548 static void dccp_v4_reqsk_destructor(struct request_sock *req) 552 549 { 553 550 dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); 554 - kfree(inet_rsk(req)->opt); 551 + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); 555 552 } 556 553 557 554 void dccp_syn_ack_timeout(const struct request_sock *req)
+7 -17
net/ipv4/cipso_ipv4.c
··· 1951 1951 buf = NULL; 1952 1952 1953 1953 req_inet = inet_rsk(req); 1954 - opt = xchg(&req_inet->opt, opt); 1954 + opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); 1955 1955 if (opt) 1956 1956 kfree_rcu(opt, rcu); 1957 1957 ··· 1973 1973 * values on failure. 1974 1974 * 1975 1975 */ 1976 - static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) 1976 + static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) 1977 1977 { 1978 + struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); 1978 1979 int hdr_delta = 0; 1979 - struct ip_options_rcu *opt = *opt_ptr; 1980 1980 1981 + if (!opt || opt->opt.cipso == 0) 1982 + return 0; 1981 1983 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { 1982 1984 u8 cipso_len; 1983 1985 u8 cipso_off; ··· 2041 2039 */ 2042 2040 void cipso_v4_sock_delattr(struct sock *sk) 2043 2041 { 2044 - int hdr_delta; 2045 - struct ip_options_rcu *opt; 2046 2042 struct inet_sock *sk_inet; 2043 + int hdr_delta; 2047 2044 2048 2045 sk_inet = inet_sk(sk); 2049 - opt = rcu_dereference_protected(sk_inet->inet_opt, 1); 2050 - if (!opt || opt->opt.cipso == 0) 2051 - return; 2052 2046 2053 2047 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); 2054 2048 if (sk_inet->is_icsk && hdr_delta > 0) { ··· 2064 2066 */ 2065 2067 void cipso_v4_req_delattr(struct request_sock *req) 2066 2068 { 2067 - struct ip_options_rcu *opt; 2068 - struct inet_request_sock *req_inet; 2069 - 2070 - req_inet = inet_rsk(req); 2071 - opt = req_inet->opt; 2072 - if (!opt || opt->opt.cipso == 0) 2073 - return; 2074 - 2075 - cipso_v4_delopt(&req_inet->opt); 2069 + cipso_v4_delopt(&inet_rsk(req)->ireq_opt); 2076 2070 } 2077 2071 2078 2072 /**
+3 -5
net/ipv4/inet_connection_sock.c
··· 540 540 { 541 541 const struct inet_request_sock *ireq = inet_rsk(req); 542 542 struct net *net = read_pnet(&ireq->ireq_net); 543 - struct ip_options_rcu *opt = ireq->opt; 543 + struct ip_options_rcu *opt; 544 544 struct rtable *rt; 545 545 546 + opt = rcu_dereference(ireq->ireq_opt); 546 547 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 547 548 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 548 549 sk->sk_protocol, inet_sk_flowi_flags(sk), ··· 577 576 struct flowi4 *fl4; 578 577 struct rtable *rt; 579 578 579 + opt = rcu_dereference(ireq->ireq_opt); 580 580 fl4 = &newinet->cork.fl.u.ip4; 581 581 582 - rcu_read_lock(); 583 - opt = rcu_dereference(newinet->inet_opt); 584 582 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 585 583 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 586 584 sk->sk_protocol, inet_sk_flowi_flags(sk), ··· 592 592 goto no_route; 593 593 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 594 594 goto route_err; 595 - rcu_read_unlock(); 596 595 return &rt->dst; 597 596 598 597 route_err: 599 598 ip_rt_put(rt); 600 599 no_route: 601 - rcu_read_unlock(); 602 600 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 603 601 return NULL; 604 602 }
+1 -1
net/ipv4/syncookies.c
··· 355 355 /* We throwed the options of the initial SYN away, so we hope 356 356 * the ACK carries the same options again (see RFC1122 4.2.3.8) 357 357 */ 358 - ireq->opt = tcp_v4_save_options(sock_net(sk), skb); 358 + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); 359 359 360 360 if (security_inet_conn_request(sk, skb, req)) { 361 361 reqsk_free(req);
+1 -1
net/ipv4/tcp_input.c
··· 6196 6196 struct inet_request_sock *ireq = inet_rsk(req); 6197 6197 6198 6198 kmemcheck_annotate_bitfield(ireq, flags); 6199 - ireq->opt = NULL; 6199 + ireq->ireq_opt = NULL; 6200 6200 #if IS_ENABLED(CONFIG_IPV6) 6201 6201 ireq->pktopts = NULL; 6202 6202 #endif
+13 -9
net/ipv4/tcp_ipv4.c
··· 877 877 878 878 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 879 879 ireq->ir_rmt_addr, 880 - ireq->opt); 880 + rcu_dereference(ireq->ireq_opt)); 881 881 err = net_xmit_eval(err); 882 882 } 883 883 ··· 889 889 */ 890 890 static void tcp_v4_reqsk_destructor(struct request_sock *req) 891 891 { 892 - kfree(inet_rsk(req)->opt); 892 + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); 893 893 } 894 894 895 895 #ifdef CONFIG_TCP_MD5SIG ··· 1265 1265 struct sk_buff *skb) 1266 1266 { 1267 1267 struct inet_request_sock *ireq = inet_rsk(req); 1268 + struct net *net = sock_net(sk_listener); 1268 1269 1269 1270 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 1270 1271 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 1271 - ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); 1272 + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); 1272 1273 } 1273 1274 1274 1275 static struct dst_entry *tcp_v4_route_req(const struct sock *sk, ··· 1356 1355 sk_daddr_set(newsk, ireq->ir_rmt_addr); 1357 1356 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 1358 1357 newsk->sk_bound_dev_if = ireq->ir_iif; 1359 - newinet->inet_saddr = ireq->ir_loc_addr; 1360 - inet_opt = ireq->opt; 1361 - rcu_assign_pointer(newinet->inet_opt, inet_opt); 1362 - ireq->opt = NULL; 1358 + newinet->inet_saddr = ireq->ir_loc_addr; 1359 + inet_opt = rcu_dereference(ireq->ireq_opt); 1360 + RCU_INIT_POINTER(newinet->inet_opt, inet_opt); 1363 1361 newinet->mc_index = inet_iif(skb); 1364 1362 newinet->mc_ttl = ip_hdr(skb)->ttl; 1365 1363 newinet->rcv_tos = ip_hdr(skb)->tos; ··· 1403 1403 if (__inet_inherit_port(sk, newsk) < 0) 1404 1404 goto put_and_exit; 1405 1405 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1406 - if (*own_req) 1406 + if (likely(*own_req)) { 1407 1407 tcp_move_syn(newtp, req); 1408 - 1408 + ireq->ireq_opt = NULL; 1409 + } else { 1410 + newinet->inet_opt = NULL; 1411 + } 1409 1412 return newsk; 1410 1413 1411 1414 exit_overflow: ··· 1419 1416 tcp_listendrop(sk); 1420 1417 return NULL; 1421 1418 put_and_exit: 1419 + newinet->inet_opt = NULL; 1422 1420 inet_csk_prepare_forced_close(newsk); 1423 1421 tcp_done(newsk); 1424 1422 goto exit;