Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/core: Postpone uobject cleanup on failure till FD close

Remove the ib_is_destroyable_retryable() concept.

The idea here was to allow the drivers to forcibly clean the HW object
even if they otherwise didn't want to (eg because of usecnt). This was an
attempt to clean up in a world where drivers were not allowed to fail HW
object destruction.

Now that we are going back to allowing HW objects to fail destroy this
doesn't make sense. Instead if a uobject's HW object can't be destroyed it
is left on the uobject list and it is up to uverbs_destroy_ufile_hw() to
clean it. Multiple passes over the uobject list allow hidden dependencies
to be resolved. If that fails the HW driver is broken, throw a WARN_ON and
leak the HW object memory.

All the other tricky failure paths (eg on creation error unwind) have
already been updated to this new model.

Link: https://lore.kernel.org/r/20201104144556.3809085-2-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Leon Romanovsky and committed by
Jason Gunthorpe
efa968ee f7a95c90

+45 -113
+17 -34
drivers/infiniband/core/rdma_core.c
··· 137 137 } else if (uobj->object) { 138 138 ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, 139 139 attrs); 140 - if (ret) { 141 - if (ib_is_destroy_retryable(ret, reason, uobj)) 142 - return ret; 143 - 144 - /* Nothing to be done, dangle the memory and move on */ 145 - WARN(true, 146 - "ib_uverbs: failed to remove uobject id %d, driver err=%d", 147 - uobj->id, ret); 148 - } 140 + if (ret) 141 + /* Nothing to be done, wait till ucontext will clean it */ 142 + return ret; 149 143 150 144 uobj->object = NULL; 151 145 } ··· 537 543 struct uverbs_obj_idr_type, type); 538 544 int ret = idr_type->destroy_object(uobj, why, attrs); 539 545 540 - /* 541 - * We can only fail gracefully if the user requested to destroy the 542 - * object or when a retry may be called upon an error. 543 - * In the rest of the cases, just remove whatever you can. 544 - */ 545 - if (ib_is_destroy_retryable(ret, why, uobj)) 546 + if (ret) 546 547 return ret; 547 548 548 549 if (why == RDMA_REMOVE_ABORT) ··· 570 581 { 571 582 const struct uverbs_obj_fd_type *fd_type = container_of( 572 583 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); 573 - int ret = fd_type->destroy_object(uobj, why); 574 584 575 - if (ib_is_destroy_retryable(ret, why, uobj)) 576 - return ret; 577 - 578 - return 0; 585 + return fd_type->destroy_object(uobj, why); 579 586 } 580 587 581 588 static void remove_handle_fd_uobject(struct ib_uobject *uobj) ··· 848 863 * racing with a lookup_get. 849 864 */ 850 865 WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); 866 + if (reason == RDMA_REMOVE_DRIVER_FAILURE) 867 + obj->object = NULL; 851 868 if (!uverbs_destroy_uobject(obj, reason, &attrs)) 852 869 ret = 0; 853 870 else 854 871 atomic_set(&obj->usecnt, 0); 872 + } 873 + 874 + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { 875 + WARN_ON(!list_empty(&ufile->uobjects)); 876 + return 0; 855 877 } 856 878 return ret; 857 879 } ··· 881 889 if (!ufile->ucontext) 882 890 goto done; 883 891 884 - ufile->ucontext->cleanup_retryable = true; 885 - while (!list_empty(&ufile->uobjects)) 886 - if (__uverbs_cleanup_ufile(ufile, reason)) { 887 - /* 888 - * No entry was cleaned-up successfully during this 889 - * iteration. It is a driver bug to fail destruction. 890 - */ 891 - WARN_ON(!list_empty(&ufile->uobjects)); 892 - break; 893 - } 892 + while (!list_empty(&ufile->uobjects) && 893 + !__uverbs_cleanup_ufile(ufile, reason)) { 894 + } 894 895 895 - ufile->ucontext->cleanup_retryable = false; 896 - if (!list_empty(&ufile->uobjects)) 897 - __uverbs_cleanup_ufile(ufile, reason); 898 - 896 + if (WARN_ON(!list_empty(&ufile->uobjects))) 897 + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); 899 898 ufile_destroy_ucontext(ufile, reason); 900 899 901 900 done:
+2 -3
drivers/infiniband/core/uverbs_cmd.c
··· 681 681 return 0; 682 682 683 683 ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); 684 - 685 - if (ib_is_destroy_retryable(ret, why, uobject)) { 684 + if (ret) { 686 685 atomic_inc(&xrcd->usecnt); 687 686 return ret; 688 687 } ··· 689 690 if (inode) 690 691 xrcd_table_delete(dev, inode); 691 692 692 - return ret; 693 + return 0; 693 694 } 694 695 695 696 static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
+6 -9
drivers/infiniband/core/uverbs_std_types.c
··· 88 88 return -EBUSY; 89 89 90 90 ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); 91 - if (ib_is_destroy_retryable(ret, why, uobject)) 91 + if (ret) 92 92 return ret; 93 93 94 94 for (i = 0; i < table_size; i++) ··· 96 96 97 97 kfree(rwq_ind_tbl); 98 98 kfree(ind_tbl); 99 - return ret; 99 + return 0; 100 100 } 101 101 102 102 static int uverbs_free_xrcd(struct ib_uobject *uobject, ··· 108 108 container_of(uobject, struct ib_uxrcd_object, uobject); 109 109 int ret; 110 110 111 - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); 112 - if (ret) 113 - return ret; 111 + if (atomic_read(&uxrcd->refcnt)) 112 + return -EBUSY; 114 113 115 114 mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); 116 115 ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); ··· 123 124 struct uverbs_attr_bundle *attrs) 124 125 { 125 126 struct ib_pd *pd = uobject->object; 126 - int ret; 127 127 128 - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); 129 - if (ret) 130 - return ret; 128 + if (atomic_read(&pd->usecnt)) 129 + return -EBUSY; 131 130 132 131 return ib_dealloc_pd_user(pd, &attrs->driver_udata); 133 132 }
+2 -3
drivers/infiniband/core/uverbs_std_types_counters.c
··· 42 42 struct ib_counters *counters = uobject->object; 43 43 int ret; 44 44 45 - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); 46 - if (ret) 47 - return ret; 45 + if (atomic_read(&counters->usecnt)) 46 + return -EBUSY; 48 47 49 48 ret = counters->device->ops.destroy_counters(counters); 50 49 if (ret)
+2 -2
drivers/infiniband/core/uverbs_std_types_cq.c
··· 46 46 int ret; 47 47 48 48 ret = ib_destroy_cq_user(cq, &attrs->driver_udata); 49 - if (ib_is_destroy_retryable(ret, why, uobject)) 49 + if (ret) 50 50 return ret; 51 51 52 52 ib_uverbs_release_ucq( ··· 55 55 ev_queue) : 56 56 NULL, 57 57 ucq); 58 - return ret; 58 + return 0; 59 59 } 60 60 61 61 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+2 -4
drivers/infiniband/core/uverbs_std_types_dm.c
··· 39 39 struct uverbs_attr_bundle *attrs) 40 40 { 41 41 struct ib_dm *dm = uobject->object; 42 - int ret; 43 42 44 - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); 45 - if (ret) 46 - return ret; 43 + if (atomic_read(&dm->usecnt)) 44 + return -EBUSY; 47 45 48 46 return dm->device->ops.dealloc_dm(dm, attrs); 49 47 }
+2 -4
drivers/infiniband/core/uverbs_std_types_flow_action.c
··· 39 39 struct uverbs_attr_bundle *attrs) 40 40 { 41 41 struct ib_flow_action *action = uobject->object; 42 - int ret; 43 42 44 - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); 45 - if (ret) 46 - return ret; 43 + if (atomic_read(&action->usecnt)) 44 + return -EBUSY; 47 45 48 46 return action->device->ops.destroy_flow_action(action); 49 47 }
+2 -2
drivers/infiniband/core/uverbs_std_types_qp.c
··· 32 32 } 33 33 34 34 ret = ib_destroy_qp_user(qp, &attrs->driver_udata); 35 - if (ib_is_destroy_retryable(ret, why, uobject)) 35 + if (ret) 36 36 return ret; 37 37 38 38 if (uqp->uxrcd) 39 39 atomic_dec(&uqp->uxrcd->refcnt); 40 40 41 41 ib_uverbs_release_uevent(&uqp->uevent); 42 - return ret; 42 + return 0; 43 43 } 44 44 45 45 static int check_creation_flags(enum ib_qp_type qp_type,
+2 -2
drivers/infiniband/core/uverbs_std_types_srq.c
··· 18 18 int ret; 19 19 20 20 ret = ib_destroy_srq_user(srq, &attrs->driver_udata); 21 - if (ib_is_destroy_retryable(ret, why, uobject)) 21 + if (ret) 22 22 return ret; 23 23 24 24 if (srq_type == IB_SRQT_XRC) { ··· 30 30 } 31 31 32 32 ib_uverbs_release_uevent(uevent); 33 - return ret; 33 + return 0; 34 34 } 35 35 36 36 static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+2 -2
drivers/infiniband/core/uverbs_std_types_wq.c
··· 17 17 int ret; 18 18 19 19 ret = ib_destroy_wq_user(wq, &attrs->driver_udata); 20 - if (ib_is_destroy_retryable(ret, why, uobject)) 20 + if (ret) 21 21 return ret; 22 22 23 23 ib_uverbs_release_uevent(&uwq->uevent); 24 - return ret; 24 + return 0; 25 25 } 26 26 27 27 static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+2 -2
drivers/infiniband/hw/mlx5/devx.c
··· 1310 1310 else 1311 1311 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, 1312 1312 obj->dinlen, out, sizeof(out)); 1313 - if (ib_is_destroy_retryable(ret, why, uobject)) 1313 + if (ret) 1314 1314 return ret; 1315 1315 1316 1316 devx_event_table = &dev->devx_event_table; ··· 2181 2181 int err; 2182 2182 2183 2183 err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); 2184 - if (ib_is_destroy_retryable(err, why, uobject)) 2184 + if (err) 2185 2185 return err; 2186 2186 2187 2187 ib_umem_release(obj->umem);
+2 -4
drivers/infiniband/hw/mlx5/fs.c
··· 2035 2035 struct uverbs_attr_bundle *attrs) 2036 2036 { 2037 2037 struct mlx5_ib_flow_matcher *obj = uobject->object; 2038 - int ret; 2039 2038 2040 - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); 2041 - if (ret) 2042 - return ret; 2039 + if (atomic_read(&obj->usecnt)) 2040 + return -EBUSY; 2043 2041 2044 2042 kfree(obj); 2045 2043 return 0;
+2 -42
include/rdma/ib_verbs.h
··· 1471 1471 RDMA_REMOVE_DRIVER_REMOVE, 1472 1472 /* uobj is being cleaned-up before being committed */ 1473 1473 RDMA_REMOVE_ABORT, 1474 + /* The driver failed to destroy the uobject and is being disconnected */ 1475 + RDMA_REMOVE_DRIVER_FAILURE, 1474 1476 }; 1475 1477 1476 1478 struct ib_rdmacg_object { ··· 1484 1482 struct ib_ucontext { 1485 1483 struct ib_device *device; 1486 1484 struct ib_uverbs_file *ufile; 1487 - 1488 - bool cleanup_retryable; 1489 1485 1490 1486 struct ib_rdmacg_object cg_obj; 1491 1487 /* ··· 2901 2901 size_t len) 2902 2902 { 2903 2903 return ib_is_buffer_cleared(udata->inbuf + offset, len); 2904 - } 2905 - 2906 - /** 2907 - * ib_is_destroy_retryable - Check whether the uobject destruction 2908 - * is retryable. 2909 - * @ret: The initial destruction return code 2910 - * @why: remove reason 2911 - * @uobj: The uobject that is destroyed 2912 - * 2913 - * This function is a helper function that IB layer and low-level drivers 2914 - * can use to consider whether the destruction of the given uobject is 2915 - * retry-able. 2916 - * It checks the original return code, if it wasn't success the destruction 2917 - * is retryable according to the ucontext state (i.e. cleanup_retryable) and 2918 - * the remove reason. (i.e. why). 2919 - * Must be called with the object locked for destroy. 2920 - */ 2921 - static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, 2922 - struct ib_uobject *uobj) 2923 - { 2924 - return ret && (why == RDMA_REMOVE_DESTROY || 2925 - uobj->context->cleanup_retryable); 2926 - } 2927 - 2928 - /** 2929 - * ib_destroy_usecnt - Called during destruction to check the usecnt 2930 - * @usecnt: The usecnt atomic 2931 - * @why: remove reason 2932 - * @uobj: The uobject that is destroyed 2933 - * 2934 - * Non-zero usecnts will block destruction unless destruction was triggered by 2935 - * a ucontext cleanup. 2936 - */ 2937 - static inline int ib_destroy_usecnt(atomic_t *usecnt, 2938 - enum rdma_remove_reason why, 2939 - struct ib_uobject *uobj) 2940 - { 2941 - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) 2942 - return -EBUSY; 2943 - return 0; 2944 2904 } 2945 2905 2946 2906 /**