Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/rxe: Remove the direct link to net_device

The similar patch in siw is in the link:
https://git.kernel.org/rdma/rdma/c/16b87037b48889

This problem also occurred in RXE. The following analyze this problem.
In the following Call Traces:
"
BUG: KASAN: slab-use-after-free in dev_get_flags+0x188/0x1d0 net/core/dev.c:8782
Read of size 4 at addr ffff8880554640b0 by task kworker/1:4/5295

CPU: 1 UID: 0 PID: 5295 Comm: kworker/1:4 Not tainted
6.12.0-rc3-syzkaller-00399-g9197b73fd7bb #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 09/13/2024
Workqueue: infiniband ib_cache_event_task
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
print_address_description mm/kasan/report.c:377 [inline]
print_report+0x169/0x550 mm/kasan/report.c:488
kasan_report+0x143/0x180 mm/kasan/report.c:601
dev_get_flags+0x188/0x1d0 net/core/dev.c:8782
rxe_query_port+0x12d/0x260 drivers/infiniband/sw/rxe/rxe_verbs.c:60
__ib_query_port drivers/infiniband/core/device.c:2111 [inline]
ib_query_port+0x168/0x7d0 drivers/infiniband/core/device.c:2143
ib_cache_update+0x1a9/0xb80 drivers/infiniband/core/cache.c:1494
ib_cache_event_task+0xf3/0x1e0 drivers/infiniband/core/cache.c:1568
process_one_work kernel/workqueue.c:3229 [inline]
process_scheduled_works+0xa65/0x1850 kernel/workqueue.c:3310
worker_thread+0x870/0xd30 kernel/workqueue.c:3391
kthread+0x2f2/0x390 kernel/kthread.c:389
ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
</TASK>
"

1). In the link [1],

"
infiniband syz2: set down
"

This means that on 839.350575, the event ib_cache_event_task was sent andi
queued in ib_wq.

2). In the link [1],

"
team0 (unregistering): Port device team_slave_0 removed
"

It indicates that before 843.251853, the net device should be freed.

3). In the link [1],

"
BUG: KASAN: slab-use-after-free in dev_get_flags+0x188/0x1d0
"

This means that on 850.559070, this slab-use-after-free problem occurred.

In all, on 839.350575, the event ib_cache_event_task was sent and queued
in ib_wq,

before 843.251853, the net device veth was freed.

on 850.559070, this event was executed, and the mentioned freed net device
was called. Thus, the above call trace occurred.

[1] https://syzkaller.appspot.com/x/log.txt?x=12e7025f980000

Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Link: https://patch.msgid.link/20241220222325.2487767-1-yanjun.zhu@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Zhu Yanjun and committed by
Leon Romanovsky
2ac54150 e3debdd4

+90 -19
+19 -4
drivers/infiniband/sw/rxe/rxe.c
··· 40 40 /* initialize rxe device parameters */ 41 41 static void rxe_init_device_param(struct rxe_dev *rxe) 42 42 { 43 + struct net_device *ndev; 44 + 43 45 rxe->max_inline_data = RXE_MAX_INLINE_DATA; 44 46 45 47 rxe->attr.vendor_id = RXE_VENDOR_ID; ··· 73 71 rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; 74 72 rxe->attr.max_pkeys = RXE_MAX_PKEYS; 75 73 rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; 74 + 75 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 76 + if (!ndev) 77 + return; 78 + 76 79 addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, 77 - rxe->ndev->dev_addr); 80 + ndev->dev_addr); 81 + 82 + dev_put(ndev); 78 83 79 84 rxe->max_ucontext = RXE_MAX_UCONTEXT; 80 85 } ··· 118 109 static void rxe_init_ports(struct rxe_dev *rxe) 119 110 { 120 111 struct rxe_port *port = &rxe->port; 112 + struct net_device *ndev; 121 113 122 114 rxe_init_port_param(port); 115 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 116 + if (!ndev) 117 + return; 123 118 addrconf_addr_eui48((unsigned char *)&port->port_guid, 124 - rxe->ndev->dev_addr); 119 + ndev->dev_addr); 120 + dev_put(ndev); 125 121 spin_lock_init(&port->port_lock); 126 122 } 127 123 ··· 181 167 /* called by ifc layer to create new rxe device. 182 168 * The caller should allocate memory for rxe by calling ib_alloc_device. 183 169 */ 184 - int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) 170 + int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, 171 + struct net_device *ndev) 185 172 { 186 173 rxe_init(rxe); 187 174 rxe_set_mtu(rxe, mtu); 188 175 189 - return rxe_register_device(rxe, ibdev_name); 176 + return rxe_register_device(rxe, ibdev_name, ndev); 190 177 } 191 178 192 179 static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
+2 -1
drivers/infiniband/sw/rxe/rxe.h
··· 139 139 140 140 void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); 141 141 142 - int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); 142 + int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, 143 + struct net_device *ndev); 143 144 144 145 void rxe_rcv(struct sk_buff *skb); 145 146
+20 -2
drivers/infiniband/sw/rxe/rxe_mcast.c
··· 31 31 static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 32 32 { 33 33 unsigned char ll_addr[ETH_ALEN]; 34 + struct net_device *ndev; 35 + int ret; 36 + 37 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 38 + if (!ndev) 39 + return -ENODEV; 34 40 35 41 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 36 42 37 - return dev_mc_add(rxe->ndev, ll_addr); 43 + ret = dev_mc_add(ndev, ll_addr); 44 + dev_put(ndev); 45 + 46 + return ret; 38 47 } 39 48 40 49 /** ··· 56 47 static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) 57 48 { 58 49 unsigned char ll_addr[ETH_ALEN]; 50 + struct net_device *ndev; 51 + int ret; 52 + 53 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 54 + if (!ndev) 55 + return -ENODEV; 59 56 60 57 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 61 58 62 - return dev_mc_del(rxe->ndev, ll_addr); 59 + ret = dev_mc_del(ndev, ll_addr); 60 + dev_put(ndev); 61 + 62 + return ret; 63 63 } 64 64 65 65 /**
+20 -4
drivers/infiniband/sw/rxe/rxe_net.c
··· 524 524 */ 525 525 const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) 526 526 { 527 - return rxe->ndev->name; 527 + struct net_device *ndev; 528 + char *ndev_name; 529 + 530 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 531 + if (!ndev) 532 + return NULL; 533 + ndev_name = ndev->name; 534 + dev_put(ndev); 535 + 536 + return ndev_name; 528 537 } 529 538 530 539 int rxe_net_add(const char *ibdev_name, struct net_device *ndev) ··· 545 536 if (!rxe) 546 537 return -ENOMEM; 547 538 548 - rxe->ndev = ndev; 549 539 ib_mark_name_assigned_by_user(&rxe->ib_dev); 550 540 551 - err = rxe_add(rxe, ndev->mtu, ibdev_name); 541 + err = rxe_add(rxe, ndev->mtu, ibdev_name, ndev); 552 542 if (err) { 553 543 ib_dealloc_device(&rxe->ib_dev); 554 544 return err; ··· 595 587 596 588 void rxe_set_port_state(struct rxe_dev *rxe) 597 589 { 598 - if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev)) 590 + struct net_device *ndev; 591 + 592 + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); 593 + if (!ndev) 594 + return; 595 + 596 + if (netif_running(ndev) && netif_carrier_ok(ndev)) 599 597 rxe_port_up(rxe); 600 598 else 601 599 rxe_port_down(rxe); 600 + 601 + dev_put(ndev); 602 602 } 603 603 604 604 static int rxe_notify(struct notifier_block *not_blk,
+21 -5
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 41 41 u32 port_num, struct ib_port_attr *attr) 42 42 { 43 43 struct rxe_dev *rxe = to_rdev(ibdev); 44 + struct net_device *ndev; 44 45 int err, ret; 45 46 46 47 if (port_num != 1) { 47 48 err = -EINVAL; 48 49 rxe_dbg_dev(rxe, "bad port_num = %d\n", port_num); 50 + goto err_out; 51 + } 52 + 53 + ndev = rxe_ib_device_get_netdev(ibdev); 54 + if (!ndev) { 55 + err = -ENODEV; 49 56 goto err_out; 50 57 } 51 58 ··· 64 57 65 58 if (attr->state == IB_PORT_ACTIVE) 66 59 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 67 - else if (dev_get_flags(rxe->ndev) & IFF_UP) 60 + else if (dev_get_flags(ndev) & IFF_UP) 68 61 attr->phys_state = IB_PORT_PHYS_STATE_POLLING; 69 62 else 70 63 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; 71 64 72 65 mutex_unlock(&rxe->usdev_lock); 73 66 67 + dev_put(ndev); 74 68 return ret; 75 69 76 70 err_out: ··· 1433 1425 static int rxe_enable_driver(struct ib_device *ib_dev) 1434 1426 { 1435 1427 struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); 1428 + struct net_device *ndev; 1429 + 1430 + ndev = rxe_ib_device_get_netdev(ib_dev); 1431 + if (!ndev) 1432 + return -ENODEV; 1436 1433 1437 1434 rxe_set_port_state(rxe); 1438 - dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev)); 1435 + dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(ndev)); 1436 + 1437 + dev_put(ndev); 1439 1438 return 0; 1440 1439 } 1441 1440 ··· 1510 1495 INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw), 1511 1496 }; 1512 1497 1513 - int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) 1498 + int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name, 1499 + struct net_device *ndev) 1514 1500 { 1515 1501 int err; 1516 1502 struct ib_device *dev = &rxe->ib_dev; ··· 1523 1507 dev->num_comp_vectors = num_possible_cpus(); 1524 1508 dev->local_dma_lkey = 0; 1525 1509 addrconf_addr_eui48((unsigned char *)&dev->node_guid, 1526 - rxe->ndev->dev_addr); 1510 + ndev->dev_addr); 1527 1511 1528 1512 dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | 1529 1513 BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); 1530 1514 1531 1515 ib_set_device_ops(dev, &rxe_dev_ops); 1532 - err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); 1516 + err = ib_device_set_netdev(&rxe->ib_dev, ndev, 1); 1533 1517 if (err) 1534 1518 return err; 1535 1519
+8 -3
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 370 370 u32 qp_gsi_index; 371 371 }; 372 372 373 + #define RXE_PORT 1 373 374 struct rxe_dev { 374 375 struct ib_device ib_dev; 375 376 struct ib_device_attr attr; 376 377 int max_ucontext; 377 378 int max_inline_data; 378 379 struct mutex usdev_lock; 379 - 380 - struct net_device *ndev; 381 380 382 381 struct rxe_pool uc_pool; 383 382 struct rxe_pool pd_pool; ··· 404 405 struct rxe_port port; 405 406 struct crypto_shash *tfm; 406 407 }; 408 + 409 + static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev) 410 + { 411 + return ib_device_get_netdev(dev, RXE_PORT); 412 + } 407 413 408 414 static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) 409 415 { ··· 475 471 return to_rpd(mw->ibmw.pd); 476 472 } 477 473 478 - int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); 474 + int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name, 475 + struct net_device *ndev); 479 476 480 477 #endif /* RXE_VERBS_H */