Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sock: support SO_PRIORITY cmsg

The Linux socket API currently allows setting SO_PRIORITY at the
socket level, applying a uniform priority to all packets sent through
that socket. The exception to this is IP_TOS, when the priority value
is calculated during the handling of
ancillary data, as implemented in commit f02db315b8d8 ("ipv4: IP_TOS
and IP_TTL can be specified as ancillary data").
However, this is a computed
value, and there is currently no mechanism to set a custom priority
via control messages prior to this patch.

According to this patch, if SO_PRIORITY is specified as ancillary data,
the packet is sent with the priority value set through
sockc->priority, overriding the socket-level values
set via the traditional setsockopt() method. This is analogous to
the existing support for SO_MARK, as implemented in
commit c6af0c227a22 ("ip: support SO_MARK cmsg").

If both cmsg SO_PRIORITY and IP_TOS are passed, then the one that
takes precedence is the last one in the cmsg list.

This patch has the side effect that raw_send_hdrinc now interprets cmsg
IP_TOS.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Suggested-by: Ferenc Fejes <fejes@inf.elte.hu>
Signed-off-by: Anna Emese Nyiri <annaemesenyiri@gmail.com>
Link: https://patch.msgid.link/20241213084457.45120-3-annaemesenyiri@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Anna Emese Nyiri and committed by
Jakub Kicinski
a32f3e9d 77ec16be

+24 -11
+1 -1
include/net/inet_sock.h
··· 172 172 u8 tx_flags; 173 173 __u8 ttl; 174 174 __s16 tos; 175 - char priority; 175 + u32 priority; 176 176 __u16 gso_size; 177 177 u32 ts_opt_id; 178 178 u64 transmit_time;
+1 -1
include/net/ip.h
··· 81 81 __u8 protocol; 82 82 __u8 ttl; 83 83 __s16 tos; 84 - char priority; 85 84 __u16 gso_size; 86 85 }; 87 86 ··· 95 96 ipcm_init(ipcm); 96 97 97 98 ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); 99 + ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority); 98 100 ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); 99 101 ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); 100 102 ipcm->addr = inet->inet_saddr;
+3 -1
include/net/sock.h
··· 1814 1814 u32 mark; 1815 1815 u32 tsflags; 1816 1816 u32 ts_opt_id; 1817 + u32 priority; 1817 1818 }; 1818 1819 1819 1820 static inline void sockcm_init(struct sockcm_cookie *sockc, 1820 1821 const struct sock *sk) 1821 1822 { 1822 1823 *sockc = (struct sockcm_cookie) { 1823 - .tsflags = READ_ONCE(sk->sk_tsflags) 1824 + .tsflags = READ_ONCE(sk->sk_tsflags), 1825 + .priority = READ_ONCE(sk->sk_priority), 1824 1826 }; 1825 1827 } 1826 1828
+1 -1
net/can/raw.c
··· 962 962 } 963 963 964 964 skb->dev = dev; 965 - skb->priority = READ_ONCE(sk->sk_priority); 965 + skb->priority = sockc.priority; 966 966 skb->mark = READ_ONCE(sk->sk_mark); 967 967 skb->tstamp = sockc.transmit_time; 968 968
+7
net/core/sock.c
··· 2947 2947 case SCM_RIGHTS: 2948 2948 case SCM_CREDENTIALS: 2949 2949 break; 2950 + case SO_PRIORITY: 2951 + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2952 + return -EINVAL; 2953 + if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg))) 2954 + return -EPERM; 2955 + sockc->priority = *(u32 *)CMSG_DATA(cmsg); 2956 + break; 2950 2957 default: 2951 2958 return -EINVAL; 2952 2959 }
+2 -2
net/ipv4/ip_output.c
··· 1333 1333 cork->ttl = ipc->ttl; 1334 1334 cork->tos = ipc->tos; 1335 1335 cork->mark = ipc->sockc.mark; 1336 - cork->priority = ipc->priority; 1336 + cork->priority = ipc->sockc.priority; 1337 1337 cork->transmit_time = ipc->sockc.transmit_time; 1338 1338 cork->tx_flags = 0; 1339 1339 sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); ··· 1470 1470 ip_options_build(skb, opt, cork->addr, rt); 1471 1471 } 1472 1472 1473 - skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); 1473 + skb->priority = cork->priority; 1474 1474 skb->mark = cork->mark; 1475 1475 if (sk_is_tcp(sk)) 1476 1476 skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
+1 -1
net/ipv4/ip_sockglue.c
··· 315 315 if (val < 0 || val > 255) 316 316 return -EINVAL; 317 317 ipc->tos = val; 318 - ipc->priority = rt_tos2priority(ipc->tos); 318 + ipc->sockc.priority = rt_tos2priority(ipc->tos); 319 319 break; 320 320 case IP_PROTOCOL: 321 321 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+1 -1
net/ipv4/raw.c
··· 358 358 skb_reserve(skb, hlen); 359 359 360 360 skb->protocol = htons(ETH_P_IP); 361 - skb->priority = READ_ONCE(sk->sk_priority); 361 + skb->priority = sockc->priority; 362 362 skb->mark = sockc->mark; 363 363 skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); 364 364 skb_dst_set(skb, &rt->dst);
+2 -1
net/ipv6/ip6_output.c
··· 1401 1401 cork->base.gso_size = ipc6->gso_size; 1402 1402 cork->base.tx_flags = 0; 1403 1403 cork->base.mark = ipc6->sockc.mark; 1404 + cork->base.priority = ipc6->sockc.priority; 1404 1405 sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); 1405 1406 if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { 1406 1407 cork->base.flags |= IPCORK_TS_OPT_ID; ··· 1943 1942 hdr->saddr = fl6->saddr; 1944 1943 hdr->daddr = *final_dst; 1945 1944 1946 - skb->priority = READ_ONCE(sk->sk_priority); 1945 + skb->priority = cork->base.priority; 1947 1946 skb->mark = cork->base.mark; 1948 1947 if (sk_is_tcp(sk)) 1949 1948 skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+1
net/ipv6/ping.c
··· 119 119 return -EINVAL; 120 120 121 121 ipcm6_init_sk(&ipc6, sk); 122 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 122 123 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 123 124 ipc6.sockc.mark = READ_ONCE(sk->sk_mark); 124 125
+2 -1
net/ipv6/raw.c
··· 619 619 skb_reserve(skb, hlen); 620 620 621 621 skb->protocol = htons(ETH_P_IPV6); 622 - skb->priority = READ_ONCE(sk->sk_priority); 622 + skb->priority = sockc->priority; 623 623 skb->mark = sockc->mark; 624 624 skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); 625 625 ··· 780 780 ipcm6_init(&ipc6); 781 781 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 782 782 ipc6.sockc.mark = fl6.flowi6_mark; 783 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 783 784 784 785 if (sin6) { 785 786 if (addr_len < SIN6_LEN_RFC2133)
+1
net/ipv6/udp.c
··· 1448 1448 ipc6.gso_size = READ_ONCE(up->gso_size); 1449 1449 ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); 1450 1450 ipc6.sockc.mark = READ_ONCE(sk->sk_mark); 1451 + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); 1451 1452 1452 1453 /* destination address check */ 1453 1454 if (sin6) {
+1 -1
net/packet/af_packet.c
··· 3126 3126 3127 3127 skb->protocol = proto; 3128 3128 skb->dev = dev; 3129 - skb->priority = READ_ONCE(sk->sk_priority); 3129 + skb->priority = sockc.priority; 3130 3130 skb->mark = sockc.mark; 3131 3131 skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); 3132 3132