Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: annotate some data-races around sk->sk_prot

IPv6 has this hack changing sk->sk_prot when an IPv6 socket
is 'converted' to an IPv4 one with IPV6_ADDRFORM option.

This operation is only performed for TCP and UDP, knowing
their 'struct proto' for the two network families are populated
in the same way, and can not disappear while a reader
might use and dereference sk->sk_prot.

If we think about it all reads of sk->sk_prot while
either socket lock or RTNL is not acquired should be using READ_ONCE().

Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also
write over sk->sk_prot.

BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt

write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0:
do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline]
ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019
udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649
sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489
__sys_setsockopt+0x209/0x2a0 net/socket.c:2180
__do_sys_setsockopt net/socket.c:2191 [inline]
__se_sys_setsockopt net/socket.c:2188 [inline]
__x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1:
inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659
____sys_recvmsg+0x16c/0x320
___sys_recvmsg net/socket.c:2674 [inline]
do_recvmmsg+0x3f5/0xae0 net/socket.c:2768
__sys_recvmmsg net/socket.c:2847 [inline]
__do_sys_recvmmsg net/socket.c:2870 [inline]
__se_sys_recvmmsg net/socket.c:2863 [inline]
__x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
086d4905 7ea0c16a

+22 -8
+18 -6
net/ipv6/af_inet6.c
··· 441 441 { 442 442 struct sock *sk = sock->sk; 443 443 u32 flags = BIND_WITH_LOCK; 444 + const struct proto *prot; 444 445 int err = 0; 445 446 447 + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ 448 + prot = READ_ONCE(sk->sk_prot); 446 449 /* If the socket has its own bind function then use it. */ 447 - if (sk->sk_prot->bind) 448 - return sk->sk_prot->bind(sk, uaddr, addr_len); 450 + if (prot->bind) 451 + return prot->bind(sk, uaddr, addr_len); 449 452 450 453 if (addr_len < SIN6_LEN_RFC2133) 451 454 return -EINVAL; ··· 558 555 void __user *argp = (void __user *)arg; 559 556 struct sock *sk = sock->sk; 560 557 struct net *net = sock_net(sk); 558 + const struct proto *prot; 561 559 562 560 switch (cmd) { 563 561 case SIOCADDRT: ··· 576 572 case SIOCSIFDSTADDR: 577 573 return addrconf_set_dstaddr(net, argp); 578 574 default: 579 - if (!sk->sk_prot->ioctl) 575 + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ 576 + prot = READ_ONCE(sk->sk_prot); 577 + if (!prot->ioctl) 580 578 return -ENOIOCTLCMD; 581 - return sk->sk_prot->ioctl(sk, cmd, arg); 579 + return prot->ioctl(sk, cmd, arg); 582 580 } 583 581 /*NOTREACHED*/ 584 582 return 0; ··· 642 636 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 643 637 { 644 638 struct sock *sk = sock->sk; 639 + const struct proto *prot; 645 640 646 641 if (unlikely(inet_send_prepare(sk))) 647 642 return -EAGAIN; 648 643 649 - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, 644 + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ 645 + prot = READ_ONCE(sk->sk_prot); 646 + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, 650 647 sk, msg, size); 651 648 } 652 649 ··· 659 650 int flags) 660 651 { 661 652 struct sock *sk = sock->sk; 653 + const struct proto *prot; 662 654 int addr_len = 0; 663 655 int err; 664 656 665 657 if (likely(!(flags & MSG_ERRQUEUE))) 666 658 sock_rps_record_flow(sk); 667 659 668 - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, 660 + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ 661 + prot = READ_ONCE(sk->sk_prot); 662 + err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, 669 663 sk, msg, size, flags & MSG_DONTWAIT, 670 664 flags & ~MSG_DONTWAIT, &addr_len); 671 665 if (err >= 0)
+4 -2
net/ipv6/ipv6_sockglue.c
··· 475 475 sock_prot_inuse_add(net, sk->sk_prot, -1); 476 476 sock_prot_inuse_add(net, &tcp_prot, 1); 477 477 478 - sk->sk_prot = &tcp_prot; 478 + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ 479 + WRITE_ONCE(sk->sk_prot, &tcp_prot); 479 480 icsk->icsk_af_ops = &ipv4_specific; 480 481 sk->sk_socket->ops = &inet_stream_ops; 481 482 sk->sk_family = PF_INET; ··· 490 489 sock_prot_inuse_add(net, sk->sk_prot, -1); 491 490 sock_prot_inuse_add(net, prot, 1); 492 491 493 - sk->sk_prot = prot; 492 + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ 493 + WRITE_ONCE(sk->sk_prot, prot); 494 494 sk->sk_socket->ops = &inet_dgram_ops; 495 495 sk->sk_family = PF_INET; 496 496 }