Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: silence KCSAN warnings around sk_add_backlog() calls

sk_add_backlog() callers usually read sk->sk_rcvbuf without
owning the socket lock. This means sk_rcvbuf value can
be changed by other cpus, and KCSAN complains.

Add READ_ONCE() annotations to document the lockless nature
of these reads.

Note that writes over sk_rcvbuf should also use WRITE_ONCE(),
but this will be done in separate patches to ease stable
backports (if we decide this is relevant for stable trees).

BUG: KCSAN: data-race in tcp_add_backlog / tcp_recvmsg

write to 0xffff88812ab369f8 of 8 bytes by interrupt on cpu 1:
__sk_add_backlog include/net/sock.h:902 [inline]
sk_add_backlog include/net/sock.h:933 [inline]
tcp_add_backlog+0x45a/0xcc0 net/ipv4/tcp_ipv4.c:1737
tcp_v4_rcv+0x1aba/0x1bf0 net/ipv4/tcp_ipv4.c:1925
ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204
ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
NF_HOOK include/linux/netfilter.h:305 [inline]
NF_HOOK include/linux/netfilter.h:299 [inline]
ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
dst_input include/net/dst.h:442 [inline]
ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
NF_HOOK include/linux/netfilter.h:305 [inline]
NF_HOOK include/linux/netfilter.h:299 [inline]
ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
__netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004
__netif_receive_skb+0x37/0xf0 net/core/dev.c:5118
netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208
napi_skb_finish net/core/dev.c:5671 [inline]
napi_gro_receive+0x28f/0x330 net/core/dev.c:5704
receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061
virtnet_receive drivers/net/virtio_net.c:1323 [inline]
virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428
napi_poll net/core/dev.c:6352 [inline]
net_rx_action+0x3ae/0xa50 net/core/dev.c:6418

read to 0xffff88812ab369f8 of 8 bytes by task 7271 on cpu 0:
tcp_recvmsg+0x470/0x1a30 net/ipv4/tcp.c:2047
inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
sock_recvmsg_nosec net/socket.c:871 [inline]
sock_recvmsg net/socket.c:889 [inline]
sock_recvmsg+0x92/0xb0 net/socket.c:885
sock_read_iter+0x15f/0x1e0 net/socket.c:967
call_read_iter include/linux/fs.h:1864 [inline]
new_sync_read+0x389/0x4f0 fs/read_write.c:414
__vfs_read+0xb1/0xc0 fs/read_write.c:427
vfs_read fs/read_write.c:461 [inline]
vfs_read+0x143/0x2c0 fs/read_write.c:446
ksys_read+0xd5/0x1b0 fs/read_write.c:587
__do_sys_read fs/read_write.c:597 [inline]
__se_sys_read fs/read_write.c:595 [inline]
__x64_sys_read+0x4c/0x60 fs/read_write.c:595
do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296
entry_SYSCALL_64_after_hwframe+0x44/0xa9

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 7271 Comm: syz-fuzzer Not tainted 5.3.0+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

authored by

Eric Dumazet and committed by
Jakub Kicinski
8265792b 1f142c17

+10 -10
+1 -1
net/core/sock.c
··· 522 522 rc = sk_backlog_rcv(sk, skb); 523 523 524 524 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 525 - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 525 + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { 526 526 bh_unlock_sock(sk); 527 527 atomic_inc(&sk->sk_drops); 528 528 goto discard_and_relse;
+1 -1
net/ipv4/tcp_ipv4.c
··· 1644 1644 1645 1645 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) 1646 1646 { 1647 - u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; 1647 + u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); 1648 1648 struct skb_shared_info *shinfo; 1649 1649 const struct tcphdr *th; 1650 1650 struct tcphdr *thtail;
+1 -1
net/llc/llc_conn.c
··· 813 813 else { 814 814 dprintk("%s: adding to backlog...\n", __func__); 815 815 llc_set_backlog_type(skb, LLC_PACKET); 816 - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) 816 + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) 817 817 goto drop_unlock; 818 818 } 819 819 out:
+3 -3
net/sctp/input.c
··· 322 322 bh_lock_sock(sk); 323 323 324 324 if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { 325 - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) 325 + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) 326 326 sctp_chunk_free(chunk); 327 327 else 328 328 backloged = 1; ··· 337 337 return 0; 338 338 } else { 339 339 if (!sctp_newsk_ready(sk)) { 340 - if (!sk_add_backlog(sk, skb, sk->sk_rcvbuf)) 340 + if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) 341 341 return 0; 342 342 sctp_chunk_free(chunk); 343 343 } else { ··· 364 364 struct sctp_ep_common *rcvr = chunk->rcvr; 365 365 int ret; 366 366 367 - ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf); 367 + ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); 368 368 if (!ret) { 369 369 /* Hold the assoc/ep while hanging on the backlog queue. 370 370 * This way, we know structures we need will not disappear
+3 -3
net/tipc/socket.c
··· 2119 2119 struct tipc_msg *hdr = buf_msg(skb); 2120 2120 2121 2121 if (unlikely(msg_in_group(hdr))) 2122 - return sk->sk_rcvbuf; 2122 + return READ_ONCE(sk->sk_rcvbuf); 2123 2123 2124 2124 if (unlikely(!msg_connected(hdr))) 2125 - return sk->sk_rcvbuf << msg_importance(hdr); 2125 + return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); 2126 2126 2127 2127 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) 2128 - return sk->sk_rcvbuf; 2128 + return READ_ONCE(sk->sk_rcvbuf); 2129 2129 2130 2130 return FLOWCTL_MSG_LIM; 2131 2131 }
+1 -1
net/x25/x25_dev.c
··· 55 55 if (!sock_owned_by_user(sk)) { 56 56 queued = x25_process_rx_frame(sk, skb); 57 57 } else { 58 - queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf); 58 + queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); 59 59 } 60 60 bh_unlock_sock(sk); 61 61 sock_put(sk);