Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: implement lockless SO_PRIORITY

This is a followup of 8bf43be799d4 ("net: annotate data-races
around sk->sk_priority").

sk->sk_priority can be read and written without holding the socket lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
10bbf165 06bc3668

+36 -35
+1 -1
drivers/net/ppp/pppoe.c
··· 877 877 878 878 skb->dev = dev; 879 879 880 - skb->priority = sk->sk_priority; 880 + skb->priority = READ_ONCE(sk->sk_priority); 881 881 skb->protocol = cpu_to_be16(ETH_P_PPP_SES); 882 882 883 883 ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr));
+1 -1
include/net/bluetooth/bluetooth.h
··· 541 541 return ERR_PTR(-EFAULT); 542 542 } 543 543 544 - skb->priority = sk->sk_priority; 544 + skb->priority = READ_ONCE(sk->sk_priority); 545 545 546 546 return skb; 547 547 }
+1 -1
net/appletalk/aarp.c
··· 664 664 665 665 sendit: 666 666 if (skb->sk) 667 - skb->priority = skb->sk->sk_priority; 667 + skb->priority = READ_ONCE(skb->sk->sk_priority); 668 668 if (dev_queue_xmit(skb)) 669 669 goto drop; 670 670 sent:
+1 -1
net/ax25/af_ax25.c
··· 939 939 sock_init_data(NULL, sk); 940 940 941 941 sk->sk_type = osk->sk_type; 942 - sk->sk_priority = osk->sk_priority; 942 + sk->sk_priority = READ_ONCE(osk->sk_priority); 943 943 sk->sk_protocol = osk->sk_protocol; 944 944 sk->sk_rcvbuf = osk->sk_rcvbuf; 945 945 sk->sk_sndbuf = osk->sk_sndbuf;
+1 -1
net/bluetooth/l2cap_sock.c
··· 1615 1615 return ERR_PTR(-ENOTCONN); 1616 1616 } 1617 1617 1618 - skb->priority = sk->sk_priority; 1618 + skb->priority = READ_ONCE(sk->sk_priority); 1619 1619 1620 1620 bt_cb(skb)->l2cap.chan = chan; 1621 1621
+1 -1
net/can/j1939/socket.c
··· 884 884 skcb = j1939_skb_to_cb(skb); 885 885 memset(skcb, 0, sizeof(*skcb)); 886 886 skcb->addr = jsk->addr; 887 - skcb->priority = j1939_prio(sk->sk_priority); 887 + skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority)); 888 888 889 889 if (msg->msg_name) { 890 890 struct sockaddr_can *addr = msg->msg_name;
+1 -1
net/can/raw.c
··· 881 881 } 882 882 883 883 skb->dev = dev; 884 - skb->priority = sk->sk_priority; 884 + skb->priority = READ_ONCE(sk->sk_priority); 885 885 skb->mark = READ_ONCE(sk->sk_mark); 886 886 skb->tstamp = sockc.transmit_time; 887 887
+12 -11
net/core/sock.c
··· 806 806 807 807 void sock_set_priority(struct sock *sk, u32 priority) 808 808 { 809 - lock_sock(sk); 810 809 WRITE_ONCE(sk->sk_priority, priority); 811 - release_sock(sk); 812 810 } 813 811 EXPORT_SYMBOL(sock_set_priority); 814 812 ··· 1116 1118 1117 1119 valbool = val ? 1 : 0; 1118 1120 1121 + /* handle options which do not require locking the socket. */ 1122 + switch (optname) { 1123 + case SO_PRIORITY: 1124 + if ((val >= 0 && val <= 6) || 1125 + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || 1126 + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1127 + sock_set_priority(sk, val); 1128 + return 0; 1129 + } 1130 + return -EPERM; 1131 + } 1132 + 1119 1133 sockopt_lock_sock(sk); 1120 1134 1121 1135 switch (optname) { ··· 1221 1211 1222 1212 case SO_NO_CHECK: 1223 1213 sk->sk_no_check_tx = valbool; 1224 - break; 1225 - 1226 - case SO_PRIORITY: 1227 - if ((val >= 0 && val <= 6) || 1228 - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || 1229 - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1230 - WRITE_ONCE(sk->sk_priority, val); 1231 - else 1232 - ret = -EPERM; 1233 1214 break; 1234 1215 1235 1216 case SO_LINGER:
+1 -1
net/dccp/ipv6.c
··· 239 239 if (!opt) 240 240 opt = rcu_dereference(np->opt); 241 241 err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, 242 - np->tclass, sk->sk_priority); 242 + np->tclass, READ_ONCE(sk->sk_priority)); 243 243 rcu_read_unlock(); 244 244 err = net_xmit_eval(err); 245 245 }
+1 -1
net/ipv4/inet_diag.c
··· 165 165 * For cgroup2 classid is always zero. 166 166 */ 167 167 if (!classid) 168 - classid = sk->sk_priority; 168 + classid = READ_ONCE(sk->sk_priority); 169 169 170 170 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) 171 171 goto errout;
+1 -1
net/ipv4/ip_output.c
··· 1449 1449 ip_options_build(skb, opt, cork->addr, rt); 1450 1450 } 1451 1451 1452 - skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; 1452 + skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); 1453 1453 skb->mark = cork->mark; 1454 1454 skb->tstamp = cork->transmit_time; 1455 1455 /*
+1 -1
net/ipv4/tcp_ipv4.c
··· 828 828 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? 829 829 inet_twsk(sk)->tw_mark : sk->sk_mark; 830 830 ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? 831 - inet_twsk(sk)->tw_priority : sk->sk_priority; 831 + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); 832 832 transmit_time = tcp_transmit_time(sk); 833 833 xfrm_sk_clone_policy(ctl_sk, sk); 834 834 txhash = (sk->sk_state == TCP_TIME_WAIT) ?
+1 -1
net/ipv4/tcp_minisocks.c
··· 292 292 293 293 tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); 294 294 tw->tw_mark = sk->sk_mark; 295 - tw->tw_priority = sk->sk_priority; 295 + tw->tw_priority = READ_ONCE(sk->sk_priority); 296 296 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 297 297 tcptw->tw_rcv_nxt = tp->rcv_nxt; 298 298 tcptw->tw_snd_nxt = tp->snd_nxt;
+1 -1
net/ipv6/inet6_connection_sock.c
··· 133 133 fl6.daddr = sk->sk_v6_daddr; 134 134 135 135 res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), 136 - np->tclass, sk->sk_priority); 136 + np->tclass, READ_ONCE(sk->sk_priority)); 137 137 rcu_read_unlock(); 138 138 return res; 139 139 }
+1 -1
net/ipv6/ip6_output.c
··· 1984 1984 hdr->saddr = fl6->saddr; 1985 1985 hdr->daddr = *final_dst; 1986 1986 1987 - skb->priority = sk->sk_priority; 1987 + skb->priority = READ_ONCE(sk->sk_priority); 1988 1988 skb->mark = cork->base.mark; 1989 1989 skb->tstamp = cork->base.transmit_time; 1990 1990
+2 -2
net/ipv6/tcp_ipv6.c
··· 565 565 if (!opt) 566 566 opt = rcu_dereference(np->opt); 567 567 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 568 - opt, tclass, sk->sk_priority); 568 + opt, tclass, READ_ONCE(sk->sk_priority)); 569 569 rcu_read_unlock(); 570 570 err = net_xmit_eval(err); 571 571 } ··· 1058 1058 trace_tcp_send_reset(sk, skb); 1059 1059 if (inet6_test_bit(REPFLOW, sk)) 1060 1060 label = ip6_flowlabel(ipv6h); 1061 - priority = sk->sk_priority; 1061 + priority = READ_ONCE(sk->sk_priority); 1062 1062 txhash = sk->sk_txhash; 1063 1063 } 1064 1064 if (sk->sk_state == TCP_TIME_WAIT) {
+1 -1
net/mptcp/sockopt.c
··· 89 89 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 90 90 break; 91 91 case SO_PRIORITY: 92 - ssk->sk_priority = val; 92 + WRITE_ONCE(ssk->sk_priority, val); 93 93 break; 94 94 case SO_SNDBUF: 95 95 case SO_SNDBUFFORCE:
+1 -1
net/netrom/af_netrom.c
··· 487 487 sock_init_data(NULL, sk); 488 488 489 489 sk->sk_type = osk->sk_type; 490 - sk->sk_priority = osk->sk_priority; 490 + sk->sk_priority = READ_ONCE(osk->sk_priority); 491 491 sk->sk_protocol = osk->sk_protocol; 492 492 sk->sk_rcvbuf = osk->sk_rcvbuf; 493 493 sk->sk_sndbuf = osk->sk_sndbuf;
+1 -1
net/rose/af_rose.c
··· 583 583 #endif 584 584 585 585 sk->sk_type = osk->sk_type; 586 - sk->sk_priority = osk->sk_priority; 586 + sk->sk_priority = READ_ONCE(osk->sk_priority); 587 587 sk->sk_protocol = osk->sk_protocol; 588 588 sk->sk_rcvbuf = osk->sk_rcvbuf; 589 589 sk->sk_sndbuf = osk->sk_sndbuf;
+1 -1
net/sched/em_meta.c
··· 546 546 *err = -1; 547 547 return; 548 548 } 549 - dst->value = sk->sk_priority; 549 + dst->value = READ_ONCE(sk->sk_priority); 550 550 } 551 551 552 552 META_COLLECTOR(int_sk_rcvlowat)
+1 -1
net/sctp/ipv6.c
··· 247 247 rcu_read_lock(); 248 248 res = ip6_xmit(sk, skb, fl6, sk->sk_mark, 249 249 rcu_dereference(np->opt), 250 - tclass, sk->sk_priority); 250 + tclass, READ_ONCE(sk->sk_priority)); 251 251 rcu_read_unlock(); 252 252 return res; 253 253 }
+1 -1
net/smc/af_smc.c
··· 493 493 nsk->sk_sndtimeo = osk->sk_sndtimeo; 494 494 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 495 495 nsk->sk_mark = READ_ONCE(osk->sk_mark); 496 - nsk->sk_priority = osk->sk_priority; 496 + nsk->sk_priority = READ_ONCE(osk->sk_priority); 497 497 nsk->sk_rcvlowat = osk->sk_rcvlowat; 498 498 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 499 499 nsk->sk_err = osk->sk_err;
+1 -1
net/x25/af_x25.c
··· 598 598 x25 = x25_sk(sk); 599 599 600 600 sk->sk_type = osk->sk_type; 601 - sk->sk_priority = osk->sk_priority; 601 + sk->sk_priority = READ_ONCE(osk->sk_priority); 602 602 sk->sk_protocol = osk->sk_protocol; 603 603 sk->sk_rcvbuf = osk->sk_rcvbuf; 604 604 sk->sk_sndbuf = osk->sk_sndbuf;
+1 -1
net/xdp/xsk.c
··· 684 684 } 685 685 686 686 skb->dev = dev; 687 - skb->priority = xs->sk.sk_priority; 687 + skb->priority = READ_ONCE(xs->sk.sk_priority); 688 688 skb->mark = READ_ONCE(xs->sk.sk_mark); 689 689 skb->destructor = xsk_destruct_skb; 690 690 xsk_set_destructor_arg(skb);