Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/rds: Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV

In order to:
1) avoid a silly bouncing between "clean_list" and "drop_list"
triggered by function "rds_ib_reg_frmr" as it is releases frmr
regions whose state is not "FRMR_IS_FREE" right away.

2) prevent an invalid access error in a race from a pending
"IB_WR_LOCAL_INV" operation with a teardown ("dma_unmap_sg", "put_page")
and de-registration ("ib_dereg_mr") of the corresponding
memory region.

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Gerd Rausch and committed by
David S. Miller
5f33141d c9467447

+40 -27
+38 -27
net/rds/ib_frmr.c
··· 76 76 77 77 frmr->fr_state = FRMR_IS_FREE; 78 78 init_waitqueue_head(&frmr->fr_inv_done); 79 + init_waitqueue_head(&frmr->fr_reg_done); 79 80 return ibmr; 80 81 81 82 out_no_cigar: ··· 125 124 */ 126 125 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); 127 126 frmr->fr_state = FRMR_IS_INUSE; 127 + frmr->fr_reg = true; 128 128 129 129 memset(&reg_wr, 0, sizeof(reg_wr)); 130 130 reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; ··· 146 144 if (printk_ratelimit()) 147 145 pr_warn("RDS/IB: %s returned error(%d)\n", 148 146 __func__, ret); 147 + goto out; 149 148 } 149 + 150 + /* Wait for the registration to complete in order to prevent an invalid 151 + * access error resulting from a race between the memory region already 152 + * being accessed while registration is still pending. 153 + */ 154 + wait_event(frmr->fr_reg_done, !frmr->fr_reg); 155 + 156 + out: 157 + 150 158 return ret; 151 159 } 152 160 ··· 274 262 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); 275 263 goto out; 276 264 } 265 + 266 + /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to 267 + * 1) avoid a silly bouncing between "clean_list" and "drop_list" 268 + * triggered by function "rds_ib_reg_frmr" as it is releases frmr 269 + * regions whose state is not "FRMR_IS_FREE" right away. 270 + * 2) prevents an invalid access error in a race 271 + * from a pending "IB_WR_LOCAL_INV" operation 272 + * with a teardown ("dma_unmap_sg", "put_page") 273 + * and de-registration ("ib_dereg_mr") of the corresponding 274 + * memory region. 275 + */ 276 + wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); 277 + 277 278 out: 278 279 return ret; 279 280 } ··· 314 289 wake_up(&frmr->fr_inv_done); 315 290 } 316 291 292 + if (frmr->fr_reg) { 293 + frmr->fr_reg = false; 294 + wake_up(&frmr->fr_reg_done); 295 + } 296 + 317 297 atomic_inc(&ic->i_fastreg_wrs); 318 298 } 319 299 ··· 327 297 { 328 298 struct rds_ib_mr *ibmr, *next; 329 299 struct rds_ib_frmr *frmr; 330 - int ret = 0; 300 + int ret = 0, ret2; 331 301 unsigned int freed = *nfreed; 332 302 333 303 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 334 304 list_for_each_entry(ibmr, list, unmap_list) { 335 - if (ibmr->sg_dma_len) 336 - ret |= rds_ib_post_inv(ibmr); 305 + if (ibmr->sg_dma_len) { 306 + ret2 = rds_ib_post_inv(ibmr); 307 + if (ret2 && !ret) 308 + ret = ret2; 309 + } 337 310 } 311 + 338 312 if (ret) 339 313 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); 340 314 ··· 381 347 } 382 348 383 349 do { 384 - if (ibmr) { 385 - /* Memory regions make it onto the "clean_list" via 386 - * "rds_ib_flush_mr_pool", after the memory region has 387 - * been posted for invalidation via "rds_ib_post_inv". 388 - * 389 - * At that point in time, "fr_state" may still be 390 - * in state "FRMR_IS_INUSE", since the only place where 391 - * "fr_state" transitions to "FRMR_IS_FREE" is in 392 - * is in "rds_ib_mr_cqe_handler", which is 393 - * triggered by a tasklet. 394 - * 395 - * So we wait for "fr_inv_done" to trigger 396 - * and only put memory regions onto the drop_list 397 - * that failed (i.e. not marked "FRMR_IS_FREE"). 398 - * 399 - * This avoids the problem of memory-regions bouncing 400 - * between "clean_list" and "drop_list" before they 401 - * even have a chance to be properly invalidated. 402 - */ 403 - frmr = &ibmr->u.frmr; 404 - wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); 405 - if (frmr->fr_state == FRMR_IS_FREE) 406 - break; 350 + if (ibmr) 407 351 rds_ib_free_frmr(ibmr, true); 408 - } 409 352 ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); 410 353 if (IS_ERR(ibmr)) 411 354 return ibmr;
+2
net/rds/ib_mr.h
··· 58 58 enum rds_ib_fr_state fr_state; 59 59 bool fr_inv; 60 60 wait_queue_head_t fr_inv_done; 61 + bool fr_reg; 62 + wait_queue_head_t fr_reg_done; 61 63 struct ib_send_wr fr_wr; 62 64 unsigned int dma_npages; 63 65 unsigned int sg_byte_len;