Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv4: fix data races in fib_alias_hw_flags_set

fib_alias_hw_flags_set() can be used by concurrent threads,
and is only RCU protected.

We need to annotate accesses to following fields of struct fib_alias:

offload, trap, offload_failed

Because of READ_ONCE()WRITE_ONCE() limitations, make these
field u8.

BUG: KCSAN: data-race in fib_alias_hw_flags_set / fib_alias_hw_flags_set

read to 0xffff888134224a6a of 1 bytes by task 2013 on cpu 1:
fib_alias_hw_flags_set+0x28a/0x470 net/ipv4/fib_trie.c:1050
nsim_fib4_rt_hw_flags_set drivers/net/netdevsim/fib.c:350 [inline]
nsim_fib4_rt_add drivers/net/netdevsim/fib.c:367 [inline]
nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:429 [inline]
nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline]
nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline]
nsim_fib_event_work+0x1852/0x2cf0 drivers/net/netdevsim/fib.c:1477
process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
process_scheduled_works kernel/workqueue.c:2370 [inline]
worker_thread+0x7df/0xa70 kernel/workqueue.c:2456
kthread+0x1bf/0x1e0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30

write to 0xffff888134224a6a of 1 bytes by task 4872 on cpu 0:
fib_alias_hw_flags_set+0x2d5/0x470 net/ipv4/fib_trie.c:1054
nsim_fib4_rt_hw_flags_set drivers/net/netdevsim/fib.c:350 [inline]
nsim_fib4_rt_add drivers/net/netdevsim/fib.c:367 [inline]
nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:429 [inline]
nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline]
nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline]
nsim_fib_event_work+0x1852/0x2cf0 drivers/net/netdevsim/fib.c:1477
process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
process_scheduled_works kernel/workqueue.c:2370 [inline]
worker_thread+0x7df/0xa70 kernel/workqueue.c:2456
kthread+0x1bf/0x1e0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30

value changed: 0x00 -> 0x02

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 4872 Comm: kworker/0:0 Not tainted 5.17.0-rc3-syzkaller-00188-g1d41d2e82623-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events nsim_fib_event_work

Fixes: 90b93f1b31f8 ("ipv4: Add "offload" and "trap" indications to routes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20220216173217.3792411-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
9fcf986c 430065e2

+21 -18
+3 -4
net/ipv4/fib_lookup.h
··· 16 16 u8 fa_slen; 17 17 u32 tb_id; 18 18 s16 fa_default; 19 - u8 offload:1, 20 - trap:1, 21 - offload_failed:1, 22 - unused:5; 19 + u8 offload; 20 + u8 trap; 21 + u8 offload_failed; 23 22 struct rcu_head rcu; 24 23 }; 25 24
+3 -3
net/ipv4/fib_semantics.c
··· 525 525 fri.dst_len = dst_len; 526 526 fri.tos = fa->fa_tos; 527 527 fri.type = fa->fa_type; 528 - fri.offload = fa->offload; 529 - fri.trap = fa->trap; 530 - fri.offload_failed = fa->offload_failed; 528 + fri.offload = READ_ONCE(fa->offload); 529 + fri.trap = READ_ONCE(fa->trap); 530 + fri.offload_failed = READ_ONCE(fa->offload_failed); 531 531 err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); 532 532 if (err < 0) { 533 533 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
+13 -9
net/ipv4/fib_trie.c
··· 1047 1047 if (!fa_match) 1048 1048 goto out; 1049 1049 1050 - if (fa_match->offload == fri->offload && fa_match->trap == fri->trap && 1051 - fa_match->offload_failed == fri->offload_failed) 1050 + /* These are paired with the WRITE_ONCE() happening in this function. 1051 + * The reason is that we are only protected by RCU at this point. 1052 + */ 1053 + if (READ_ONCE(fa_match->offload) == fri->offload && 1054 + READ_ONCE(fa_match->trap) == fri->trap && 1055 + READ_ONCE(fa_match->offload_failed) == fri->offload_failed) 1052 1056 goto out; 1053 1057 1054 - fa_match->offload = fri->offload; 1055 - fa_match->trap = fri->trap; 1058 + WRITE_ONCE(fa_match->offload, fri->offload); 1059 + WRITE_ONCE(fa_match->trap, fri->trap); 1056 1060 1057 1061 /* 2 means send notifications only if offload_failed was changed. */ 1058 1062 if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && 1059 - fa_match->offload_failed == fri->offload_failed) 1063 + READ_ONCE(fa_match->offload_failed) == fri->offload_failed) 1060 1064 goto out; 1061 1065 1062 - fa_match->offload_failed = fri->offload_failed; 1066 + WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); 1063 1067 1064 1068 if (!net->ipv4.sysctl_fib_notify_on_flag_change) 1065 1069 goto out; ··· 2301 2297 fri.dst_len = KEYLENGTH - fa->fa_slen; 2302 2298 fri.tos = fa->fa_tos; 2303 2299 fri.type = fa->fa_type; 2304 - fri.offload = fa->offload; 2305 - fri.trap = fa->trap; 2306 - fri.offload_failed = fa->offload_failed; 2300 + fri.offload = READ_ONCE(fa->offload); 2301 + fri.trap = READ_ONCE(fa->trap); 2302 + fri.offload_failed = READ_ONCE(fa->offload_failed); 2307 2303 err = fib_dump_info(skb, 2308 2304 NETLINK_CB(cb->skb).portid, 2309 2305 cb->nlh->nlmsg_seq,
+2 -2
net/ipv4/route.c
··· 3395 3395 fa->fa_tos == fri.tos && 3396 3396 fa->fa_info == res.fi && 3397 3397 fa->fa_type == fri.type) { 3398 - fri.offload = fa->offload; 3399 - fri.trap = fa->trap; 3398 + fri.offload = READ_ONCE(fa->offload); 3399 + fri.trap = READ_ONCE(fa->trap); 3400 3400 break; 3401 3401 } 3402 3402 }