Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/ucma: Protect mc during concurrent multicast leaves

Partially revert the commit mentioned in the Fixes line to make sure that
allocation and erasing multicast struct are locked.

BUG: KASAN: use-after-free in ucma_cleanup_multicast drivers/infiniband/core/ucma.c:491 [inline]
BUG: KASAN: use-after-free in ucma_destroy_private_ctx+0x914/0xb70 drivers/infiniband/core/ucma.c:579
Read of size 8 at addr ffff88801bb74b00 by task syz-executor.1/25529
CPU: 0 PID: 25529 Comm: syz-executor.1 Not tainted 5.16.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247
__kasan_report mm/kasan/report.c:433 [inline]
kasan_report.cold+0x83/0xdf mm/kasan/report.c:450
ucma_cleanup_multicast drivers/infiniband/core/ucma.c:491 [inline]
ucma_destroy_private_ctx+0x914/0xb70 drivers/infiniband/core/ucma.c:579
ucma_destroy_id+0x1e6/0x280 drivers/infiniband/core/ucma.c:614
ucma_write+0x25c/0x350 drivers/infiniband/core/ucma.c:1732
vfs_write+0x28e/0xae0 fs/read_write.c:588
ksys_write+0x1ee/0x250 fs/read_write.c:643
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

Currently the xarray search can touch a concurrently freeing mc as the
xa_for_each() is not surrounded by any lock. Rather than hold the lock for
a full scan hold it only for the effected items, which is usually an empty
list.

Fixes: 95fe51096b7a ("RDMA/ucma: Remove mc_list and rely on xarray")
Link: https://lore.kernel.org/r/1cda5fabb1081e8d16e39a48d3a4f8160cea88b8.1642491047.git.leonro@nvidia.com
Reported-by: syzbot+e3f96c43d19782dd14a7@syzkaller.appspotmail.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Leon Romanovsky and committed by
Jason Gunthorpe
36e8169e d9e410eb

+23 -11
+23 -11
drivers/infiniband/core/ucma.c
··· 95 95 u64 uid; 96 96 97 97 struct list_head list; 98 + struct list_head mc_list; 98 99 struct work_struct close_work; 99 100 }; 100 101 ··· 106 105 107 106 u64 uid; 108 107 u8 join_state; 108 + struct list_head list; 109 109 struct sockaddr_storage addr; 110 110 }; 111 111 ··· 200 198 201 199 INIT_WORK(&ctx->close_work, ucma_close_id); 202 200 init_completion(&ctx->comp); 201 + INIT_LIST_HEAD(&ctx->mc_list); 203 202 /* So list_del() will work if we don't do ucma_finish_ctx() */ 204 203 INIT_LIST_HEAD(&ctx->list); 205 204 ctx->file = file; ··· 487 484 488 485 static void ucma_cleanup_multicast(struct ucma_context *ctx) 489 486 { 490 - struct ucma_multicast *mc; 491 - unsigned long index; 487 + struct ucma_multicast *mc, *tmp; 492 488 493 - xa_for_each(&multicast_table, index, mc) { 494 - if (mc->ctx != ctx) 495 - continue; 489 + xa_lock(&multicast_table); 490 + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 491 + list_del(&mc->list); 496 492 /* 497 493 * At this point mc->ctx->ref is 0 so the mc cannot leave the 498 494 * lock on the reader and this is enough serialization 499 495 */ 500 - xa_erase(&multicast_table, index); 496 + __xa_erase(&multicast_table, mc->id); 501 497 kfree(mc); 502 498 } 499 + xa_unlock(&multicast_table); 503 500 } 504 501 505 502 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) ··· 1472 1469 mc->uid = cmd->uid; 1473 1470 memcpy(&mc->addr, addr, cmd->addr_size); 1474 1471 1475 - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1472 + xa_lock(&multicast_table); 1473 + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1476 1474 GFP_KERNEL)) { 1477 1475 ret = -ENOMEM; 1478 1476 goto err_free_mc; 1479 1477 } 1478 + 1479 + list_add_tail(&mc->list, &ctx->mc_list); 1480 + xa_unlock(&multicast_table); 1480 1481 1481 1482 mutex_lock(&ctx->mutex); 1482 1483 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, ··· 1507 1500 mutex_unlock(&ctx->mutex); 1508 1501 ucma_cleanup_mc_events(mc); 1509 1502 err_xa_erase: 1510 - xa_erase(&multicast_table, mc->id); 1503 + xa_lock(&multicast_table); 1504 + list_del(&mc->list); 1505 + __xa_erase(&multicast_table, mc->id); 1511 1506 err_free_mc: 1507 + xa_unlock(&multicast_table); 1512 1508 kfree(mc); 1513 1509 err_put_ctx: 1514 1510 ucma_put_ctx(ctx); ··· 1579 1569 mc = ERR_PTR(-EINVAL); 1580 1570 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1581 1571 mc = ERR_PTR(-ENXIO); 1582 - else 1583 - __xa_erase(&multicast_table, mc->id); 1584 - xa_unlock(&multicast_table); 1585 1572 1586 1573 if (IS_ERR(mc)) { 1574 + xa_unlock(&multicast_table); 1587 1575 ret = PTR_ERR(mc); 1588 1576 goto out; 1589 1577 } 1578 + 1579 + list_del(&mc->list); 1580 + __xa_erase(&multicast_table, mc->id); 1581 + xa_unlock(&multicast_table); 1590 1582 1591 1583 mutex_lock(&mc->ctx->mutex); 1592 1584 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);