Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/core: lock client data with lists_rwsem

An ib_client callback that is called with the lists_rwsem locked only for
read is protected from changes to the IB client lists, but not from
ib_unregister_device() freeing its client data. This is because
ib_unregister_device() will remove the device from the device list with
lists_rwsem locked for write, but perform the rest of the cleanup,
including the call to remove() without that lock.

Mark client data that is undergoing de-registration with a new going_down
flag in the client data context. Lock the client data list with lists_rwsem
for write in addition to using the spinlock, so that functions calling the
callback would be able to lock only lists_rwsem for read and let callbacks
sleep.

Since ib_unregister_client() now marks the client data context, no need for
remove() to search the context again, so pass the client data directly to
remove() callbacks.

Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Haggai Eran and committed by
Doug Ledford
7c1eb45a 5aa44bb9

+82 -52
+1 -1
drivers/infiniband/core/cache.c
··· 394 394 kfree(device->cache.lmc_cache); 395 395 } 396 396 397 - static void ib_cache_cleanup_one(struct ib_device *device) 397 + static void ib_cache_cleanup_one(struct ib_device *device, void *client_data) 398 398 { 399 399 int p; 400 400
+3 -4
drivers/infiniband/core/cm.c
··· 58 58 MODULE_LICENSE("Dual BSD/GPL"); 59 59 60 60 static void cm_add_one(struct ib_device *device); 61 - static void cm_remove_one(struct ib_device *device); 61 + static void cm_remove_one(struct ib_device *device, void *client_data); 62 62 63 63 static struct ib_client cm_client = { 64 64 .name = "cm", ··· 3886 3886 kfree(cm_dev); 3887 3887 } 3888 3888 3889 - static void cm_remove_one(struct ib_device *ib_device) 3889 + static void cm_remove_one(struct ib_device *ib_device, void *client_data) 3890 3890 { 3891 - struct cm_device *cm_dev; 3891 + struct cm_device *cm_dev = client_data; 3892 3892 struct cm_port *port; 3893 3893 struct ib_port_modify port_modify = { 3894 3894 .clr_port_cap_mask = IB_PORT_CM_SUP ··· 3896 3896 unsigned long flags; 3897 3897 int i; 3898 3898 3899 - cm_dev = ib_get_client_data(ib_device, &cm_client); 3900 3899 if (!cm_dev) 3901 3900 return; 3902 3901
+3 -4
drivers/infiniband/core/cma.c
··· 94 94 EXPORT_SYMBOL(rdma_event_msg); 95 95 96 96 static void cma_add_one(struct ib_device *device); 97 - static void cma_remove_one(struct ib_device *device); 97 + static void cma_remove_one(struct ib_device *device, void *client_data); 98 98 99 99 static struct ib_client cma_client = { 100 100 .name = "cma", ··· 3554 3554 wait_for_completion(&cma_dev->comp); 3555 3555 } 3556 3556 3557 - static void cma_remove_one(struct ib_device *device) 3557 + static void cma_remove_one(struct ib_device *device, void *client_data) 3558 3558 { 3559 - struct cma_device *cma_dev; 3559 + struct cma_device *cma_dev = client_data; 3560 3560 3561 - cma_dev = ib_get_client_data(device, &cma_client); 3562 3561 if (!cma_dev) 3563 3562 return; 3564 3563
+44 -9
drivers/infiniband/core/device.c
··· 50 50 struct list_head list; 51 51 struct ib_client *client; 52 52 void * data; 53 + /* The device or client is going down. Do not call client or device 54 + * callbacks other than remove(). */ 55 + bool going_down; 53 56 }; 54 57 55 58 struct workqueue_struct *ib_wq; ··· 72 69 * to the lists must be done with a write lock. A special case is when the 73 70 * device_mutex is locked. In this case locking the lists for read access is 74 71 * not necessary as the device_mutex implies it. 72 + * 73 + * lists_rwsem also protects access to the client data list. 75 74 */ 76 75 static DEFINE_MUTEX(device_mutex); 77 76 static DECLARE_RWSEM(lists_rwsem); ··· 215 210 216 211 context->client = client; 217 212 context->data = NULL; 213 + context->going_down = false; 218 214 215 + down_write(&lists_rwsem); 219 216 spin_lock_irqsave(&device->client_data_lock, flags); 220 217 list_add(&context->list, &device->client_data_list); 221 218 spin_unlock_irqrestore(&device->client_data_lock, flags); 219 + up_write(&lists_rwsem); 222 220 223 221 return 0; 224 222 } ··· 347 339 */ 348 340 void ib_unregister_device(struct ib_device *device) 349 341 { 350 - struct ib_client *client; 351 342 struct ib_client_data *context, *tmp; 352 343 unsigned long flags; 353 344 ··· 354 347 355 348 down_write(&lists_rwsem); 356 349 list_del(&device->core_list); 357 - up_write(&lists_rwsem); 350 + spin_lock_irqsave(&device->client_data_lock, flags); 351 + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 352 + context->going_down = true; 353 + spin_unlock_irqrestore(&device->client_data_lock, flags); 354 + downgrade_write(&lists_rwsem); 358 355 359 - list_for_each_entry_reverse(client, &client_list, list) 360 - if (client->remove) 361 - client->remove(device); 356 + list_for_each_entry_safe(context, tmp, &device->client_data_list, 357 + list) { 358 + if (context->client->remove) 359 + context->client->remove(device, context->data); 360 + } 361 + up_read(&lists_rwsem); 362 362 363 363 mutex_unlock(&device_mutex); 364 364 365 365 ib_device_unregister_sysfs(device); 366 366 367 + down_write(&lists_rwsem); 367 368 spin_lock_irqsave(&device->client_data_lock, flags); 368 369 list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 369 370 kfree(context); 370 371 spin_unlock_irqrestore(&device->client_data_lock, flags); 372 + up_write(&lists_rwsem); 371 373 372 374 device->reg_state = IB_DEV_UNREGISTERED; 373 375 } ··· 436 420 up_write(&lists_rwsem); 437 421 438 422 list_for_each_entry(device, &device_list, core_list) { 439 - if (client->remove) 440 - client->remove(device); 423 + struct ib_client_data *found_context = NULL; 441 424 425 + down_write(&lists_rwsem); 442 426 spin_lock_irqsave(&device->client_data_lock, flags); 443 427 list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 444 428 if (context->client == client) { 445 - list_del(&context->list); 446 - kfree(context); 429 + context->going_down = true; 430 + found_context = context; 431 + break; 447 432 } 448 433 spin_unlock_irqrestore(&device->client_data_lock, flags); 434 + up_write(&lists_rwsem); 435 + 436 + if (client->remove) 437 + client->remove(device, found_context ? 438 + found_context->data : NULL); 439 + 440 + if (!found_context) { 441 + pr_warn("No client context found for %s/%s\n", 442 + device->name, client->name); 443 + continue; 444 + } 445 + 446 + down_write(&lists_rwsem); 447 + spin_lock_irqsave(&device->client_data_lock, flags); 448 + list_del(&found_context->list); 449 + kfree(found_context); 450 + spin_unlock_irqrestore(&device->client_data_lock, flags); 451 + up_write(&lists_rwsem); 449 452 } 450 453 451 454 mutex_unlock(&device_mutex);
+1 -1
drivers/infiniband/core/mad.c
··· 3335 3335 } 3336 3336 } 3337 3337 3338 - static void ib_mad_remove_device(struct ib_device *device) 3338 + static void ib_mad_remove_device(struct ib_device *device, void *client_data) 3339 3339 { 3340 3340 int i; 3341 3341
+3 -4
drivers/infiniband/core/multicast.c
··· 43 43 #include "sa.h" 44 44 45 45 static void mcast_add_one(struct ib_device *device); 46 - static void mcast_remove_one(struct ib_device *device); 46 + static void mcast_remove_one(struct ib_device *device, void *client_data); 47 47 48 48 static struct ib_client mcast_client = { 49 49 .name = "ib_multicast", ··· 840 840 ib_register_event_handler(&dev->event_handler); 841 841 } 842 842 843 - static void mcast_remove_one(struct ib_device *device) 843 + static void mcast_remove_one(struct ib_device *device, void *client_data) 844 844 { 845 - struct mcast_device *dev; 845 + struct mcast_device *dev = client_data; 846 846 struct mcast_port *port; 847 847 int i; 848 848 849 - dev = ib_get_client_data(device, &mcast_client); 850 849 if (!dev) 851 850 return; 852 851
+3 -3
drivers/infiniband/core/sa_query.c
··· 107 107 }; 108 108 109 109 static void ib_sa_add_one(struct ib_device *device); 110 - static void ib_sa_remove_one(struct ib_device *device); 110 + static void ib_sa_remove_one(struct ib_device *device, void *client_data); 111 111 112 112 static struct ib_client sa_client = { 113 113 .name = "sa", ··· 1221 1221 return; 1222 1222 } 1223 1223 1224 - static void ib_sa_remove_one(struct ib_device *device) 1224 + static void ib_sa_remove_one(struct ib_device *device, void *client_data) 1225 1225 { 1226 - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); 1226 + struct ib_sa_device *sa_dev = client_data; 1227 1227 int i; 1228 1228 1229 1229 if (!sa_dev)
+3 -3
drivers/infiniband/core/ucm.c
··· 109 109 #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) 110 110 111 111 static void ib_ucm_add_one(struct ib_device *device); 112 - static void ib_ucm_remove_one(struct ib_device *device); 112 + static void ib_ucm_remove_one(struct ib_device *device, void *client_data); 113 113 114 114 static struct ib_client ucm_client = { 115 115 .name = "ucm", ··· 1310 1310 return; 1311 1311 } 1312 1312 1313 - static void ib_ucm_remove_one(struct ib_device *device) 1313 + static void ib_ucm_remove_one(struct ib_device *device, void *client_data) 1314 1314 { 1315 - struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); 1315 + struct ib_ucm_device *ucm_dev = client_data; 1316 1316 1317 1317 if (!ucm_dev) 1318 1318 return;
+3 -3
drivers/infiniband/core/user_mad.c
··· 133 133 static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 134 134 135 135 static void ib_umad_add_one(struct ib_device *device); 136 - static void ib_umad_remove_one(struct ib_device *device); 136 + static void ib_umad_remove_one(struct ib_device *device, void *client_data); 137 137 138 138 static void ib_umad_release_dev(struct kobject *kobj) 139 139 { ··· 1322 1322 kobject_put(&umad_dev->kobj); 1323 1323 } 1324 1324 1325 - static void ib_umad_remove_one(struct ib_device *device) 1325 + static void ib_umad_remove_one(struct ib_device *device, void *client_data) 1326 1326 { 1327 - struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client); 1327 + struct ib_umad_device *umad_dev = client_data; 1328 1328 int i; 1329 1329 1330 1330 if (!umad_dev)
+3 -3
drivers/infiniband/core/uverbs_main.c
··· 128 128 }; 129 129 130 130 static void ib_uverbs_add_one(struct ib_device *device); 131 - static void ib_uverbs_remove_one(struct ib_device *device); 131 + static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 132 132 133 133 static void ib_uverbs_release_dev(struct kref *ref) 134 134 { ··· 948 948 return; 949 949 } 950 950 951 - static void ib_uverbs_remove_one(struct ib_device *device) 951 + static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 952 952 { 953 - struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); 953 + struct ib_uverbs_device *uverbs_dev = client_data; 954 954 955 955 if (!uverbs_dev) 956 956 return;
+3 -4
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 89 89 struct ib_sa_client ipoib_sa_client; 90 90 91 91 static void ipoib_add_one(struct ib_device *device); 92 - static void ipoib_remove_one(struct ib_device *device); 92 + static void ipoib_remove_one(struct ib_device *device, void *client_data); 93 93 static void ipoib_neigh_reclaim(struct rcu_head *rp); 94 94 95 95 static struct ib_client ipoib_client = { ··· 1715 1715 ib_set_client_data(device, &ipoib_client, dev_list); 1716 1716 } 1717 1717 1718 - static void ipoib_remove_one(struct ib_device *device) 1718 + static void ipoib_remove_one(struct ib_device *device, void *client_data) 1719 1719 { 1720 1720 struct ipoib_dev_priv *priv, *tmp; 1721 - struct list_head *dev_list; 1721 + struct list_head *dev_list = client_data; 1722 1722 1723 - dev_list = ib_get_client_data(device, &ipoib_client); 1724 1723 if (!dev_list) 1725 1724 return; 1726 1725
+3 -3
drivers/infiniband/ulp/srp/ib_srp.c
··· 131 131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 132 132 133 133 static void srp_add_one(struct ib_device *device); 134 - static void srp_remove_one(struct ib_device *device); 134 + static void srp_remove_one(struct ib_device *device, void *client_data); 135 135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); 136 136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); 137 137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); ··· 3460 3460 kfree(dev_attr); 3461 3461 } 3462 3462 3463 - static void srp_remove_one(struct ib_device *device) 3463 + static void srp_remove_one(struct ib_device *device, void *client_data) 3464 3464 { 3465 3465 struct srp_device *srp_dev; 3466 3466 struct srp_host *host, *tmp_host; 3467 3467 struct srp_target_port *target; 3468 3468 3469 - srp_dev = ib_get_client_data(device, &srp_client); 3469 + srp_dev = client_data; 3470 3470 if (!srp_dev) 3471 3471 return; 3472 3472
+2 -3
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 3326 3326 /** 3327 3327 * srpt_remove_one() - InfiniBand device removal callback function. 3328 3328 */ 3329 - static void srpt_remove_one(struct ib_device *device) 3329 + static void srpt_remove_one(struct ib_device *device, void *client_data) 3330 3330 { 3331 - struct srpt_device *sdev; 3331 + struct srpt_device *sdev = client_data; 3332 3332 int i; 3333 3333 3334 - sdev = ib_get_client_data(device, &srpt_client); 3335 3334 if (!sdev) { 3336 3335 pr_info("%s(%s): nothing to do.\n", __func__, device->name); 3337 3336 return;
+3 -1
include/rdma/ib_verbs.h
··· 1550 1550 1551 1551 spinlock_t client_data_lock; 1552 1552 struct list_head core_list; 1553 + /* Access to the client_data_list is protected by the client_data_lock 1554 + * spinlock and the lists_rwsem read-write semaphore */ 1553 1555 struct list_head client_data_list; 1554 1556 1555 1557 struct ib_cache cache; ··· 1763 1761 struct ib_client { 1764 1762 char *name; 1765 1763 void (*add) (struct ib_device *); 1766 - void (*remove)(struct ib_device *); 1764 + void (*remove)(struct ib_device *, void *client_data); 1767 1765 1768 1766 struct list_head list; 1769 1767 };
+2 -3
net/rds/ib.c
··· 230 230 * 231 231 * This can be called at any time and can be racing with any other RDS path. 232 232 */ 233 - static void rds_ib_remove_one(struct ib_device *device) 233 + static void rds_ib_remove_one(struct ib_device *device, void *client_data) 234 234 { 235 - struct rds_ib_device *rds_ibdev; 235 + struct rds_ib_device *rds_ibdev = client_data; 236 236 237 - rds_ibdev = ib_get_client_data(device, &rds_ib_client); 238 237 if (!rds_ibdev) 239 238 return; 240 239
+2 -3
net/rds/iw.c
··· 125 125 kfree(dev_attr); 126 126 } 127 127 128 - static void rds_iw_remove_one(struct ib_device *device) 128 + static void rds_iw_remove_one(struct ib_device *device, void *client_data) 129 129 { 130 - struct rds_iw_device *rds_iwdev; 130 + struct rds_iw_device *rds_iwdev = client_data; 131 131 struct rds_iw_cm_id *i_cm_id, *next; 132 132 133 - rds_iwdev = ib_get_client_data(device, &rds_iw_client); 134 133 if (!rds_iwdev) 135 134 return; 136 135