Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/rds: Keep track of and wait for FRWR segments in use upon shutdown

Since "rds_ib_free_frmr" and "rds_ib_free_frmr_list" simply put
the FRMR memory segments on the "drop_list" or "free_list",
and it is the job of "rds_ib_flush_mr_pool" to reap those entries
by ultimately issuing a "IB_WR_LOCAL_INV" work-request,
we need to trigger and then wait for all those memory segments
attached to a particular connection to be fully released before
we can move on to release the QP, CQ, etc.

So we make "rds_ib_conn_path_shutdown" wait for one more
atomic_t called "i_fastreg_inuse_count" that keeps track of how
many FRWR memory segments are out there marked "FRMR_IS_INUSE"
(and also wake_up rds_ib_ring_empty_wait, as they go away).

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Gerd Rausch and committed by
David S. Miller
3a2886cc 9547dff1

+45 -6
+1
net/rds/ib.h
··· 156 156 157 157 /* To control the number of wrs from fastreg */ 158 158 atomic_t i_fastreg_wrs; 159 + atomic_t i_fastreg_inuse_count; 159 160 160 161 /* interrupt handling */ 161 162 struct tasklet_struct i_send_tasklet;
+7
net/rds/ib_cm.c
··· 40 40 #include "rds_single_path.h" 41 41 #include "rds.h" 42 42 #include "ib.h" 43 + #include "ib_mr.h" 43 44 44 45 /* 45 46 * Set the selected protocol version ··· 994 993 ic->i_cm_id, err); 995 994 } 996 995 996 + /* kick off "flush_worker" for all pools in order to reap 997 + * all FRMR registrations that are still marked "FRMR_IS_INUSE" 998 + */ 999 + rds_ib_flush_mrs(); 1000 + 997 1001 /* 998 1002 * We want to wait for tx and rx completion to finish 999 1003 * before we tear down the connection, but we have to be ··· 1011 1005 wait_event(rds_ib_ring_empty_wait, 1012 1006 rds_ib_ring_empty(&ic->i_recv_ring) && 1013 1007 (atomic_read(&ic->i_signaled_sends) == 0) && 1008 + (atomic_read(&ic->i_fastreg_inuse_count) == 0) && 1014 1009 (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); 1015 1010 tasklet_kill(&ic->i_send_tasklet); 1016 1011 tasklet_kill(&ic->i_recv_tasklet);
+37 -6
net/rds/ib_frmr.c
··· 32 32 33 33 #include "ib_mr.h" 34 34 35 + static inline void 36 + rds_transition_frwr_state(struct rds_ib_mr *ibmr, 37 + enum rds_ib_fr_state old_state, 38 + enum rds_ib_fr_state new_state) 39 + { 40 + if (cmpxchg(&ibmr->u.frmr.fr_state, 41 + old_state, new_state) == old_state && 42 + old_state == FRMR_IS_INUSE) { 43 + /* enforce order of ibmr->u.frmr.fr_state update 44 + * before decrementing i_fastreg_inuse_count 45 + */ 46 + smp_mb__before_atomic(); 47 + atomic_dec(&ibmr->ic->i_fastreg_inuse_count); 48 + if (waitqueue_active(&rds_ib_ring_empty_wait)) 49 + wake_up(&rds_ib_ring_empty_wait); 50 + } 51 + } 52 + 35 53 static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, 36 54 int npages) 37 55 { ··· 136 118 if (unlikely(ret != ibmr->sg_len)) 137 119 return ret < 0 ? ret : -EINVAL; 138 120 121 + if (cmpxchg(&frmr->fr_state, 122 + FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) 123 + return -EBUSY; 124 + 125 + atomic_inc(&ibmr->ic->i_fastreg_inuse_count); 126 + 139 127 /* Perform a WR for the fast_reg_mr. Each individual page 140 128 * in the sg list is added to the fast reg page list and placed 141 129 * inside the fast_reg_mr WR. The key used is a rolling 8bit 142 130 * counter, which should guarantee uniqueness. 143 131 */ 144 132 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); 145 - frmr->fr_state = FRMR_IS_INUSE; 146 133 frmr->fr_reg = true; 147 134 148 135 memset(&reg_wr, 0, sizeof(reg_wr)); ··· 164 141 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL); 165 142 if (unlikely(ret)) { 166 143 /* Failure here can be because of -ENOMEM as well */ 167 - frmr->fr_state = FRMR_IS_STALE; 144 + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 145 + 168 146 atomic_inc(&ibmr->ic->i_fastreg_wrs); 169 147 if (printk_ratelimit()) 170 148 pr_warn("RDS/IB: %s returned error(%d)\n", ··· 292 268 293 269 ret = ib_post_send(i_cm_id->qp, s_wr, NULL); 294 270 if (unlikely(ret)) { 295 - frmr->fr_state = FRMR_IS_STALE; 271 + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 296 272 frmr->fr_inv = false; 273 + /* enforce order of frmr->fr_inv update 274 + * before incrementing i_fastreg_wrs 275 + */ 276 + smp_mb__before_atomic(); 297 277 atomic_inc(&ibmr->ic->i_fastreg_wrs); 298 278 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); 299 279 goto out; ··· 325 297 struct rds_ib_frmr *frmr = &ibmr->u.frmr; 326 298 327 299 if (wc->status != IB_WC_SUCCESS) { 328 - frmr->fr_state = FRMR_IS_STALE; 300 + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 329 301 if (rds_conn_up(ic->conn)) 330 302 rds_ib_conn_error(ic->conn, 331 303 "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", ··· 337 309 } 338 310 339 311 if (frmr->fr_inv) { 340 - if (frmr->fr_state == FRMR_IS_INUSE) 341 - frmr->fr_state = FRMR_IS_FREE; 312 + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); 342 313 frmr->fr_inv = false; 343 314 wake_up(&frmr->fr_inv_done); 344 315 } ··· 347 320 wake_up(&frmr->fr_reg_done); 348 321 } 349 322 323 + /* enforce order of frmr->{fr_reg,fr_inv} update 324 + * before incrementing i_fastreg_wrs 325 + */ 326 + smp_mb__before_atomic(); 350 327 atomic_inc(&ic->i_fastreg_wrs); 351 328 } 352 329