Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rpcrdma: Implement generic device removal

Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device
removal") explains the benefits of handling device removal outside
of the CM event handler.

Sketch in an IB device removal notification mechanism that can be
used by both the client and server side RPC-over-RDMA transport
implementations.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
7e86845a acd9f2dd

+258 -4
+27
include/linux/sunrpc/rdma_rn.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * * Copyright (c) 2024, Oracle and/or its affiliates. 4 + */ 5 + 6 + #ifndef _LINUX_SUNRPC_RDMA_RN_H 7 + #define _LINUX_SUNRPC_RDMA_RN_H 8 + 9 + #include <rdma/ib_verbs.h> 10 + 11 + /** 12 + * rpcrdma_notification - request removal notification 13 + */ 14 + struct rpcrdma_notification { 15 + void (*rn_done)(struct rpcrdma_notification *rn); 16 + u32 rn_index; 17 + }; 18 + 19 + int rpcrdma_rn_register(struct ib_device *device, 20 + struct rpcrdma_notification *rn, 21 + void (*done)(struct rpcrdma_notification *rn)); 22 + void rpcrdma_rn_unregister(struct ib_device *device, 23 + struct rpcrdma_notification *rn); 24 + int rpcrdma_ib_client_register(void); 25 + void rpcrdma_ib_client_unregister(void); 26 + 27 + #endif /* _LINUX_SUNRPC_RDMA_RN_H */
+34
include/trace/events/rpcrdma.h
··· 2220 2220 ) 2221 2221 ); 2222 2222 2223 + DECLARE_EVENT_CLASS(rpcrdma_client_device_class, 2224 + TP_PROTO( 2225 + const struct ib_device *device 2226 + ), 2227 + 2228 + TP_ARGS(device), 2229 + 2230 + TP_STRUCT__entry( 2231 + __string(name, device->name) 2232 + ), 2233 + 2234 + TP_fast_assign( 2235 + __assign_str(name); 2236 + ), 2237 + 2238 + TP_printk("device=%s", 2239 + __get_str(name) 2240 + ) 2241 + ); 2242 + 2243 + #define DEFINE_CLIENT_DEVICE_EVENT(name) \ 2244 + DEFINE_EVENT(rpcrdma_client_device_class, name, \ 2245 + TP_PROTO( \ 2246 + const struct ib_device *device \ 2247 + ), \ 2248 + TP_ARGS(device) \ 2249 + ) 2250 + 2251 + DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion); 2252 + DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one); 2253 + DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); 2254 + DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); 2255 + DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); 2256 + 2223 2257 #endif /* _TRACE_RPCRDMA_H */ 2224 2258 2225 2259 #include <trace/define_trace.h>
+1 -1
net/sunrpc/xprtrdma/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o 3 3 4 - rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ 4 + rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \ 5 5 svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ 6 6 svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ 7 7 svc_rdma_pcl.o module.o
+181
net/sunrpc/xprtrdma/ib_client.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + /* 3 + * Copyright (c) 2024 Oracle. All rights reserved. 4 + */ 5 + 6 + /* #include <linux/module.h> 7 + #include <linux/slab.h> */ 8 + #include <linux/xarray.h> 9 + #include <linux/types.h> 10 + #include <linux/kref.h> 11 + #include <linux/completion.h> 12 + 13 + #include <linux/sunrpc/svc_rdma.h> 14 + #include <linux/sunrpc/rdma_rn.h> 15 + 16 + #include "xprt_rdma.h" 17 + #include <trace/events/rpcrdma.h> 18 + 19 + /* Per-ib_device private data for rpcrdma */ 20 + struct rpcrdma_device { 21 + struct kref rd_kref; 22 + unsigned long rd_flags; 23 + struct ib_device *rd_device; 24 + struct xarray rd_xa; 25 + struct completion rd_done; 26 + }; 27 + 28 + #define RPCRDMA_RD_F_REMOVING (0) 29 + 30 + static struct ib_client rpcrdma_ib_client; 31 + 32 + /* 33 + * Listeners have no associated device, so we never register them. 34 + * Note that ib_get_client_data() does not check if @device is 35 + * NULL for us. 36 + */ 37 + static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device) 38 + { 39 + if (!device) 40 + return NULL; 41 + return ib_get_client_data(device, &rpcrdma_ib_client); 42 + } 43 + 44 + /** 45 + * rpcrdma_rn_register - register to get device removal notifications 46 + * @device: device to monitor 47 + * @rn: notification object that wishes to be notified 48 + * @done: callback to notify caller of device removal 49 + * 50 + * Returns zero on success. The callback in rn_done is guaranteed 51 + * to be invoked when the device is removed, unless this notification 52 + * is unregistered first. 53 + * 54 + * On failure, a negative errno is returned. 55 + */ 56 + int rpcrdma_rn_register(struct ib_device *device, 57 + struct rpcrdma_notification *rn, 58 + void (*done)(struct rpcrdma_notification *rn)) 59 + { 60 + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); 61 + 62 + if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) 63 + return -ENETUNREACH; 64 + 65 + kref_get(&rd->rd_kref); 66 + if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) 67 + return -ENOMEM; 68 + rn->rn_done = done; 69 + return 0; 70 + } 71 + 72 + static void rpcrdma_rn_release(struct kref *kref) 73 + { 74 + struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device, 75 + rd_kref); 76 + 77 + trace_rpcrdma_client_completion(rd->rd_device); 78 + complete(&rd->rd_done); 79 + } 80 + 81 + /** 82 + * rpcrdma_rn_unregister - stop device removal notifications 83 + * @device: monitored device 84 + * @rn: notification object that no longer wishes to be notified 85 + */ 86 + void rpcrdma_rn_unregister(struct ib_device *device, 87 + struct rpcrdma_notification *rn) 88 + { 89 + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); 90 + 91 + if (!rd) 92 + return; 93 + 94 + xa_erase(&rd->rd_xa, rn->rn_index); 95 + kref_put(&rd->rd_kref, rpcrdma_rn_release); 96 + } 97 + 98 + /** 99 + * rpcrdma_add_one - ib_client device insertion callback 100 + * @device: device about to be inserted 101 + * 102 + * Returns zero on success. xprtrdma private data has been allocated 103 + * for this device. On failure, a negative errno is returned. 104 + */ 105 + static int rpcrdma_add_one(struct ib_device *device) 106 + { 107 + struct rpcrdma_device *rd; 108 + 109 + rd = kzalloc(sizeof(*rd), GFP_KERNEL); 110 + if (!rd) 111 + return -ENOMEM; 112 + 113 + kref_init(&rd->rd_kref); 114 + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); 115 + rd->rd_device = device; 116 + init_completion(&rd->rd_done); 117 + ib_set_client_data(device, &rpcrdma_ib_client, rd); 118 + 119 + trace_rpcrdma_client_add_one(device); 120 + return 0; 121 + } 122 + 123 + /** 124 + * rpcrdma_remove_one - ib_client device removal callback 125 + * @device: device about to be removed 126 + * @client_data: this module's private per-device data 127 + * 128 + * Upon return, all transports associated with @device have divested 129 + * themselves from IB hardware resources. 130 + */ 131 + static void rpcrdma_remove_one(struct ib_device *device, 132 + void *client_data) 133 + { 134 + struct rpcrdma_device *rd = client_data; 135 + struct rpcrdma_notification *rn; 136 + unsigned long index; 137 + 138 + trace_rpcrdma_client_remove_one(device); 139 + 140 + set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags); 141 + xa_for_each(&rd->rd_xa, index, rn) 142 + rn->rn_done(rn); 143 + 144 + /* 145 + * Wait only if there are still outstanding notification 146 + * registrants for this device. 147 + */ 148 + if (!refcount_dec_and_test(&rd->rd_kref.refcount)) { 149 + trace_rpcrdma_client_wait_on(device); 150 + wait_for_completion(&rd->rd_done); 151 + } 152 + 153 + trace_rpcrdma_client_remove_one_done(device); 154 + kfree(rd); 155 + } 156 + 157 + static struct ib_client rpcrdma_ib_client = { 158 + .name = "rpcrdma", 159 + .add = rpcrdma_add_one, 160 + .remove = rpcrdma_remove_one, 161 + }; 162 + 163 + /** 164 + * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma 165 + * 166 + * cel: watch for orphaned rpcrdma_device objects on module unload 167 + */ 168 + void rpcrdma_ib_client_unregister(void) 169 + { 170 + ib_unregister_client(&rpcrdma_ib_client); 171 + } 172 + 173 + /** 174 + * rpcrdma_ib_client_register - register ib_client for rpcrdma 175 + * 176 + * Returns zero on success, or a negative errno. 177 + */ 178 + int rpcrdma_ib_client_register(void) 179 + { 180 + return ib_register_client(&rpcrdma_ib_client); 181 + }
+15 -3
net/sunrpc/xprtrdma/module.c
··· 11 11 #include <linux/module.h> 12 12 #include <linux/init.h> 13 13 #include <linux/sunrpc/svc_rdma.h> 14 + #include <linux/sunrpc/rdma_rn.h> 14 15 15 16 #include <asm/swab.h> 16 17 ··· 31 30 { 32 31 xprt_rdma_cleanup(); 33 32 svc_rdma_cleanup(); 33 + rpcrdma_ib_client_unregister(); 34 34 } 35 35 36 36 static int __init rpc_rdma_init(void) 37 37 { 38 38 int rc; 39 39 40 + rc = rpcrdma_ib_client_register(); 41 + if (rc) 42 + goto out_rc; 43 + 40 44 rc = svc_rdma_init(); 41 45 if (rc) 42 - goto out; 46 + goto out_ib_client; 43 47 44 48 rc = xprt_rdma_init(); 45 49 if (rc) 46 - svc_rdma_cleanup(); 50 + goto out_svc_rdma; 47 51 48 - out: 52 + return 0; 53 + 54 + out_svc_rdma: 55 + svc_rdma_cleanup(); 56 + out_ib_client: 57 + rpcrdma_ib_client_unregister(); 58 + out_rc: 49 59 return rc; 50 60 } 51 61