Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

af_key: unconditionally clone on broadcast

Attempting to avoid cloning the skb when broadcasting by inflating
the refcount with sock_hold/sock_put while under RCU lock is dangerous
and violates RCU principles. It leads to subtle race conditions when
attempting to free the SKB, as we may reference sockets that have
already been freed by the stack.

Unable to handle kernel paging request at virtual address 6b6b6b6b6b6c4b
[006b6b6b6b6b6c4b] address between user and kernel address ranges
Internal error: Oops: 96000004 [#1] PREEMPT SMP
task: fffffff78f65b380 task.stack: ffffff8049a88000
pc : sock_rfree+0x38/0x6c
lr : skb_release_head_state+0x6c/0xcc
Process repro (pid: 7117, stack limit = 0xffffff8049a88000)
Call trace:
sock_rfree+0x38/0x6c
skb_release_head_state+0x6c/0xcc
skb_release_all+0x1c/0x38
__kfree_skb+0x1c/0x30
kfree_skb+0xd0/0xf4
pfkey_broadcast+0x14c/0x18c
pfkey_sendmsg+0x1d8/0x408
sock_sendmsg+0x44/0x60
___sys_sendmsg+0x1d0/0x2a8
__sys_sendmsg+0x64/0xb4
SyS_sendmsg+0x34/0x4c
el0_svc_naked+0x34/0x38
Kernel panic - not syncing: Fatal exception

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Sean Tranchetti and committed by
Steffen Klassert
fc2d5cfd f75a2804

+15 -25
+15 -25
net/key/af_key.c
··· 196 196 return 0; 197 197 } 198 198 199 - static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, 200 - gfp_t allocation, struct sock *sk) 199 + static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, 200 + struct sock *sk) 201 201 { 202 202 int err = -ENOBUFS; 203 203 204 - sock_hold(sk); 205 - if (*skb2 == NULL) { 206 - if (refcount_read(&skb->users) != 1) { 207 - *skb2 = skb_clone(skb, allocation); 208 - } else { 209 - *skb2 = skb; 210 - refcount_inc(&skb->users); 211 - } 204 + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 205 + return err; 206 + 207 + skb = skb_clone(skb, allocation); 208 + 209 + if (skb) { 210 + skb_set_owner_r(skb, sk); 211 + skb_queue_tail(&sk->sk_receive_queue, skb); 212 + sk->sk_data_ready(sk); 213 + err = 0; 212 214 } 213 - if (*skb2 != NULL) { 214 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { 215 - skb_set_owner_r(*skb2, sk); 216 - skb_queue_tail(&sk->sk_receive_queue, *skb2); 217 - sk->sk_data_ready(sk); 218 - *skb2 = NULL; 219 - err = 0; 220 - } 221 - } 222 - sock_put(sk); 223 215 return err; 224 216 } 225 217 ··· 226 234 { 227 235 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 228 236 struct sock *sk; 229 - struct sk_buff *skb2 = NULL; 230 237 int err = -ESRCH; 231 238 232 239 /* XXX Do we need something like netlink_overrun? I think ··· 244 253 * socket. 245 254 */ 246 255 if (pfk->promisc) 247 - pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); 256 + pfkey_broadcast_one(skb, GFP_ATOMIC, sk); 248 257 249 258 /* the exact target will be processed later */ 250 259 if (sk == one_sk) ··· 259 268 continue; 260 269 } 261 270 262 - err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); 271 + err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); 263 272 264 273 /* Error is cleared after successful sending to at least one 265 274 * registered KM */ ··· 269 278 rcu_read_unlock(); 270 279 271 280 if (one_sk != NULL) 272 - err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); 281 + err = pfkey_broadcast_one(skb, allocation, one_sk); 273 282 274 - kfree_skb(skb2); 275 283 kfree_skb(skb); 276 284 return err; 277 285 }