Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lightnvm: pblk: remove target using async. I/Os

When removing a pblk instance, pad the current line using asynchronous
I/O. This reduces the removal time from ~1 minute in the worst case to a
couple of seconds.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Javier González and committed by
Jens Axboe
ee8d5c1a de54e703

+122 -73
+4 -1
drivers/lightnvm/pblk-core.c
··· 273 273 { 274 274 pblk_rb_flush(&pblk->rwb); 275 275 do { 276 - if (!pblk_rb_read_count(&pblk->rwb)) 276 + if (!pblk_rb_sync_count(&pblk->rwb)) 277 277 break; 278 278 279 + pblk_write_kick(pblk); 279 280 schedule(); 280 281 } while (1); 281 282 } ··· 1351 1350 return; 1352 1351 } 1353 1352 1353 + flush_workqueue(pblk->bb_wq); 1354 1354 pblk_line_close_meta_sync(pblk); 1355 1355 1356 1356 spin_lock(&l_mg->free_lock); ··· 1549 1547 } 1550 1548 1551 1549 pblk_wait_for_meta(pblk); 1550 + flush_workqueue(pblk->close_wq); 1552 1551 } 1553 1552 1554 1553 static void pblk_line_should_sync_meta(struct pblk *pblk)
+9
drivers/lightnvm/pblk-init.c
··· 841 841 842 842 static void pblk_writer_stop(struct pblk *pblk) 843 843 { 844 + /* The pipeline must be stopped and the write buffer emptied before the 845 + * write thread is stopped 846 + */ 847 + WARN(pblk_rb_read_count(&pblk->rwb), 848 + "Stopping not fully persisted write buffer\n"); 849 + 850 + WARN(pblk_rb_sync_count(&pblk->rwb), 851 + "Stopping not fully synced write buffer\n"); 852 + 844 853 if (pblk->writer_ts) 845 854 kthread_stop(pblk->writer_ts); 846 855 del_timer(&pblk->wtimer);
+8
drivers/lightnvm/pblk-rb.c
··· 180 180 return pblk_rb_ring_count(mem, subm, rb->nr_entries); 181 181 } 182 182 183 + unsigned int pblk_rb_sync_count(struct pblk_rb *rb) 184 + { 185 + unsigned int mem = READ_ONCE(rb->mem); 186 + unsigned int sync = READ_ONCE(rb->sync); 187 + 188 + return pblk_rb_ring_count(mem, sync, rb->nr_entries); 189 + } 190 + 183 191 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) 184 192 { 185 193 unsigned int subm;
+92 -71
drivers/lightnvm/pblk-recovery.c
··· 327 327 return 0; 328 328 } 329 329 330 + static void pblk_recov_complete(struct kref *ref) 331 + { 332 + struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); 333 + 334 + complete(&pad_rq->wait); 335 + } 336 + 337 + static void pblk_end_io_recov(struct nvm_rq *rqd) 338 + { 339 + struct pblk_pad_rq *pad_rq = rqd->private; 340 + struct pblk *pblk = pad_rq->pblk; 341 + struct nvm_tgt_dev *dev = pblk->dev; 342 + 343 + kref_put(&pad_rq->ref, pblk_recov_complete); 344 + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); 345 + pblk_free_rqd(pblk, rqd, WRITE); 346 + } 347 + 330 348 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, 331 - struct pblk_recov_alloc p, int left_ppas) 349 + int left_ppas) 332 350 { 333 351 struct nvm_tgt_dev *dev = pblk->dev; 334 352 struct nvm_geo *geo = &dev->geo; 335 353 struct ppa_addr *ppa_list; 336 354 struct pblk_sec_meta *meta_list; 355 + struct pblk_pad_rq *pad_rq; 337 356 struct nvm_rq *rqd; 338 357 struct bio *bio; 339 358 void *data; 340 359 dma_addr_t dma_ppa_list, dma_meta_list; 341 360 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); 342 361 u64 w_ptr = line->cur_sec; 343 - int left_line_ppas = line->left_msecs; 344 - int rq_ppas, rq_len; 362 + int left_line_ppas, rq_ppas, rq_len; 345 363 int i, j; 346 364 int ret = 0; 347 - DECLARE_COMPLETION_ONSTACK(wait); 348 365 349 - ppa_list = p.ppa_list; 350 - meta_list = p.meta_list; 351 - rqd = p.rqd; 352 - data = p.data; 353 - dma_ppa_list = p.dma_ppa_list; 354 - dma_meta_list = p.dma_meta_list; 366 + spin_lock(&line->lock); 367 + left_line_ppas = line->left_msecs; 368 + spin_unlock(&line->lock); 369 + 370 + pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL); 371 + if (!pad_rq) 372 + return -ENOMEM; 373 + 374 + data = vzalloc(pblk->max_write_pgs * geo->sec_size); 375 + if (!data) { 376 + ret = -ENOMEM; 377 + goto free_rq; 378 + } 379 + 380 + pad_rq->pblk = pblk; 381 + init_completion(&pad_rq->wait); 382 + kref_init(&pad_rq->ref); 355 383 356 384 next_pad_rq: 357 385 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 358 - if (!rq_ppas) 359 - rq_ppas = pblk->min_write_pgs; 386 + if (rq_ppas < pblk->min_write_pgs) { 387 + pr_err("pblk: corrupted pad line %d\n", line->id); 388 + goto free_rq; 389 + } 390 + 360 391 rq_len = rq_ppas * geo->sec_size; 361 392 393 + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); 394 + if (!meta_list) { 395 + ret = -ENOMEM; 396 + goto free_data; 397 + } 398 + 399 + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; 400 + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; 401 + 402 + rqd = pblk_alloc_rqd(pblk, WRITE); 403 + if (IS_ERR(rqd)) { 404 + ret = PTR_ERR(rqd); 405 + goto fail_free_meta; 406 + } 407 + memset(rqd, 0, pblk_w_rq_size); 408 + 362 409 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 363 - if (IS_ERR(bio)) 364 - return PTR_ERR(bio); 410 + if (IS_ERR(bio)) { 411 + ret = PTR_ERR(bio); 412 + goto fail_free_rqd; 413 + } 365 414 366 415 bio->bi_iter.bi_sector = 0; /* internal bio */ 367 416 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 368 - 369 - memset(rqd, 0, pblk_g_rq_size); 370 417 371 418 rqd->bio = bio; 372 419 rqd->opcode = NVM_OP_PWRITE; ··· 423 376 rqd->ppa_list = ppa_list; 424 377 rqd->dma_ppa_list = dma_ppa_list; 425 378 rqd->dma_meta_list = dma_meta_list; 426 - rqd->end_io = pblk_end_io_sync; 427 - rqd->private = &wait; 379 + rqd->end_io = pblk_end_io_recov; 380 + rqd->private = pad_rq; 428 381 429 382 for (i = 0; i < rqd->nr_ppas; ) { 430 383 struct ppa_addr ppa; ··· 452 405 } 453 406 } 454 407 408 + kref_get(&pad_rq->ref); 409 + 455 410 ret = pblk_submit_io(pblk, rqd); 456 411 if (ret) { 457 412 pr_err("pblk: I/O submission failed: %d\n", ret); 458 - return ret; 413 + goto free_data; 459 414 } 460 415 461 - if (!wait_for_completion_io_timeout(&wait, 462 - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 463 - pr_err("pblk: L2P recovery write timed out\n"); 464 - } 465 416 atomic_dec(&pblk->inflight_io); 466 - reinit_completion(&wait); 467 417 468 418 left_line_ppas -= rq_ppas; 469 419 left_ppas -= rq_ppas; 470 - if (left_ppas > 0 && left_line_ppas) 420 + if (left_ppas && left_line_ppas) 471 421 goto next_pad_rq; 472 422 473 - return 0; 423 + kref_put(&pad_rq->ref, pblk_recov_complete); 424 + 425 + if (!wait_for_completion_io_timeout(&pad_rq->wait, 426 + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 427 + pr_err("pblk: pad write timed out\n"); 428 + ret = -ETIME; 429 + } 430 + 431 + free_rq: 432 + kfree(pad_rq); 433 + free_data: 434 + vfree(data); 435 + return ret; 436 + 437 + fail_free_rqd: 438 + pblk_free_rqd(pblk, rqd, WRITE); 439 + fail_free_meta: 440 + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); 441 + kfree(pad_rq); 442 + return ret; 474 443 } 475 444 476 445 /* When this function is called, it means that not all upper pages have been ··· 618 555 if (pad_secs > line->left_msecs) 619 556 pad_secs = line->left_msecs; 620 557 621 - ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); 558 + ret = pblk_recov_pad_oob(pblk, line, pad_secs); 622 559 if (ret) 623 560 pr_err("pblk: OOB padding failed (err:%d)\n", ret); 624 561 ··· 1024 961 */ 1025 962 int pblk_recov_pad(struct pblk *pblk) 1026 963 { 1027 - struct nvm_tgt_dev *dev = pblk->dev; 1028 - struct nvm_geo *geo = &dev->geo; 1029 964 struct pblk_line *line; 1030 965 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1031 - struct nvm_rq *rqd; 1032 - struct pblk_recov_alloc p; 1033 - struct ppa_addr *ppa_list; 1034 - struct pblk_sec_meta *meta_list; 1035 - void *data; 1036 966 int left_msecs; 1037 967 int ret = 0; 1038 - dma_addr_t dma_ppa_list, dma_meta_list; 1039 968 1040 969 spin_lock(&l_mg->free_lock); 1041 970 line = l_mg->data_line; 1042 971 left_msecs = line->left_msecs; 1043 972 spin_unlock(&l_mg->free_lock); 1044 973 1045 - rqd = pblk_alloc_rqd(pblk, READ); 1046 - if (IS_ERR(rqd)) 1047 - return PTR_ERR(rqd); 1048 - 1049 - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); 1050 - if (!meta_list) { 1051 - ret = -ENOMEM; 1052 - goto free_rqd; 1053 - } 1054 - 1055 - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; 1056 - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; 1057 - 1058 - data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); 1059 - if (!data) { 1060 - ret = -ENOMEM; 1061 - goto free_meta_list; 1062 - } 1063 - 1064 - p.ppa_list = ppa_list; 1065 - p.meta_list = meta_list; 1066 - p.rqd = rqd; 1067 - p.data = data; 1068 - p.dma_ppa_list = dma_ppa_list; 1069 - p.dma_meta_list = dma_meta_list; 1070 - 1071 - ret = pblk_recov_pad_oob(pblk, line, p, left_msecs); 974 + ret = pblk_recov_pad_oob(pblk, line, left_msecs); 1072 975 if (ret) { 1073 976 pr_err("pblk: Tear down padding failed (%d)\n", ret); 1074 - goto free_data; 977 + return ret; 1075 978 } 1076 979 1077 980 pblk_line_close_meta(pblk, line); 1078 - 1079 - free_data: 1080 - kfree(data); 1081 - free_meta_list: 1082 - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); 1083 - free_rqd: 1084 - pblk_free_rqd(pblk, rqd, READ); 1085 - 1086 981 return ret; 1087 982 }
+1 -1
drivers/lightnvm/pblk-write.c
··· 190 190 191 191 if (rqd->error) { 192 192 pblk_log_write_err(pblk, rqd); 193 - pr_err("pblk: metadata I/O failed\n"); 193 + pr_err("pblk: metadata I/O failed. Line %d\n", line->id); 194 194 } 195 195 #ifdef CONFIG_NVM_DEBUG 196 196 else
+8
drivers/lightnvm/pblk.h
··· 111 111 void *private; 112 112 }; 113 113 114 + /* Pad context */ 115 + struct pblk_pad_rq { 116 + struct pblk *pblk; 117 + struct completion wait; 118 + struct kref ref; 119 + }; 120 + 114 121 /* Recovery context */ 115 122 struct pblk_rec_ctx { 116 123 struct pblk *pblk; ··· 681 674 unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); 682 675 683 676 unsigned int pblk_rb_read_count(struct pblk_rb *rb); 677 + unsigned int pblk_rb_sync_count(struct pblk_rb *rb); 684 678 unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); 685 679 686 680 int pblk_rb_tear_down_check(struct pblk_rb *rb);