Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.17-rc2 612 lines 15 kB view raw
1/* 2 * Copyright (C) 2016 CNEX Labs 3 * Initial release: Javier Gonzalez <javier@cnexlabs.com> 4 * Matias Bjorling <matias@cnexlabs.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License version 8 * 2 as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * pblk-read.c - pblk's read path 16 */ 17 18#include "pblk.h" 19 20/* 21 * There is no guarantee that the value read from cache has not been updated and 22 * resides at another location in the cache. We guarantee though that if the 23 * value is read from the cache, it belongs to the mapped lba. In order to 24 * guarantee and order between writes and reads are ordered, a flush must be 25 * issued. 26 */ 27static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, 28 sector_t lba, struct ppa_addr ppa, 29 int bio_iter, bool advanced_bio) 30{ 31#ifdef CONFIG_NVM_DEBUG 32 /* Callers must ensure that the ppa points to a cache address */ 33 BUG_ON(pblk_ppa_empty(ppa)); 34 BUG_ON(!pblk_addr_in_cache(ppa)); 35#endif 36 37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, 38 bio_iter, advanced_bio); 39} 40 41static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 42 sector_t blba, unsigned long *read_bitmap) 43{ 44 struct pblk_sec_meta *meta_list = rqd->meta_list; 45 struct bio *bio = rqd->bio; 46 struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; 47 int nr_secs = rqd->nr_ppas; 48 bool advanced_bio = false; 49 int i, j = 0; 50 51 pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); 52 53 for (i = 0; i < nr_secs; i++) { 54 struct ppa_addr p = ppas[i]; 55 sector_t lba = blba + i; 56 57retry: 58 if (pblk_ppa_empty(p)) { 59 WARN_ON(test_and_set_bit(i, read_bitmap)); 60 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); 61 62 if (unlikely(!advanced_bio)) { 63 bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); 64 advanced_bio = true; 65 } 66 67 goto next; 68 } 69 70 /* Try to read from write buffer. The address is later checked 71 * on the write buffer to prevent retrieving overwritten data. 72 */ 73 if (pblk_addr_in_cache(p)) { 74 if (!pblk_read_from_cache(pblk, bio, lba, p, i, 75 advanced_bio)) { 76 pblk_lookup_l2p_seq(pblk, &p, lba, 1); 77 goto retry; 78 } 79 WARN_ON(test_and_set_bit(i, read_bitmap)); 80 meta_list[i].lba = cpu_to_le64(lba); 81 advanced_bio = true; 82#ifdef CONFIG_NVM_DEBUG 83 atomic_long_inc(&pblk->cache_reads); 84#endif 85 } else { 86 /* Read from media non-cached sectors */ 87 rqd->ppa_list[j++] = p; 88 } 89 90next: 91 if (advanced_bio) 92 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 93 } 94 95 if (pblk_io_aligned(pblk, nr_secs)) 96 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); 97 else 98 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 99 100#ifdef CONFIG_NVM_DEBUG 101 atomic_long_add(nr_secs, &pblk->inflight_reads); 102#endif 103} 104 105static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) 106{ 107 int err; 108 109 err = pblk_submit_io(pblk, rqd); 110 if (err) 111 return NVM_IO_ERR; 112 113 return NVM_IO_OK; 114} 115 116static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, 117 sector_t blba) 118{ 119 struct pblk_sec_meta *meta_list = rqd->meta_list; 120 int nr_lbas = rqd->nr_ppas; 121 int i; 122 123 for (i = 0; i < nr_lbas; i++) { 124 u64 lba = le64_to_cpu(meta_list[i].lba); 125 126 if (lba == ADDR_EMPTY) 127 continue; 128 129 WARN(lba != blba + i, "pblk: corrupted read LBA\n"); 130 } 131} 132 133static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) 134{ 135 struct ppa_addr *ppa_list; 136 int i; 137 138 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; 139 140 for (i = 0; i < rqd->nr_ppas; i++) { 141 struct ppa_addr ppa = ppa_list[i]; 142 struct pblk_line *line; 143 144 line = &pblk->lines[pblk_ppa_to_line(ppa)]; 145 kref_put(&line->ref, pblk_line_put_wq); 146 } 147} 148 149static void pblk_end_user_read(struct bio *bio) 150{ 151#ifdef CONFIG_NVM_DEBUG 152 WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); 153#endif 154 bio_endio(bio); 155 bio_put(bio); 156} 157 158static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, 159 bool put_line) 160{ 161 struct nvm_tgt_dev *dev = pblk->dev; 162 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 163 struct bio *bio = rqd->bio; 164 unsigned long start_time = r_ctx->start_time; 165 166 generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time); 167 168 if (rqd->error) 169 pblk_log_read_err(pblk, rqd); 170#ifdef CONFIG_NVM_DEBUG 171 else 172 WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); 173#endif 174 175 pblk_read_check(pblk, rqd, r_ctx->lba); 176 177 bio_put(bio); 178 if (r_ctx->private) 179 pblk_end_user_read((struct bio *)r_ctx->private); 180 181 if (put_line) 182 pblk_read_put_rqd_kref(pblk, rqd); 183 184#ifdef CONFIG_NVM_DEBUG 185 atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); 186 atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); 187#endif 188 189 pblk_free_rqd(pblk, rqd, PBLK_READ); 190 atomic_dec(&pblk->inflight_io); 191} 192 193static void pblk_end_io_read(struct nvm_rq *rqd) 194{ 195 struct pblk *pblk = rqd->private; 196 197 __pblk_end_io_read(pblk, rqd, true); 198} 199 200static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, 201 unsigned int bio_init_idx, 202 unsigned long *read_bitmap) 203{ 204 struct bio *new_bio, *bio = rqd->bio; 205 struct pblk_sec_meta *meta_list = rqd->meta_list; 206 struct bio_vec src_bv, dst_bv; 207 void *ppa_ptr = NULL; 208 void *src_p, *dst_p; 209 dma_addr_t dma_ppa_list = 0; 210 __le64 *lba_list_mem, *lba_list_media; 211 int nr_secs = rqd->nr_ppas; 212 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 213 int i, ret, hole; 214 215 /* Re-use allocated memory for intermediate lbas */ 216 lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); 217 lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); 218 219 new_bio = bio_alloc(GFP_KERNEL, nr_holes); 220 221 if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) 222 goto err; 223 224 if (nr_holes != new_bio->bi_vcnt) { 225 pr_err("pblk: malformed bio\n"); 226 goto err; 227 } 228 229 for (i = 0; i < nr_secs; i++) 230 lba_list_mem[i] = meta_list[i].lba; 231 232 new_bio->bi_iter.bi_sector = 0; /* internal bio */ 233 bio_set_op_attrs(new_bio, REQ_OP_READ, 0); 234 235 rqd->bio = new_bio; 236 rqd->nr_ppas = nr_holes; 237 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 238 239 if (unlikely(nr_holes == 1)) { 240 ppa_ptr = rqd->ppa_list; 241 dma_ppa_list = rqd->dma_ppa_list; 242 rqd->ppa_addr = rqd->ppa_list[0]; 243 } 244 245 ret = pblk_submit_io_sync(pblk, rqd); 246 if (ret) { 247 bio_put(rqd->bio); 248 pr_err("pblk: sync read IO submission failed\n"); 249 goto err; 250 } 251 252 if (rqd->error) { 253 atomic_long_inc(&pblk->read_failed); 254#ifdef CONFIG_NVM_DEBUG 255 pblk_print_failed_rqd(pblk, rqd, rqd->error); 256#endif 257 } 258 259 if (unlikely(nr_holes == 1)) { 260 struct ppa_addr ppa; 261 262 ppa = rqd->ppa_addr; 263 rqd->ppa_list = ppa_ptr; 264 rqd->dma_ppa_list = dma_ppa_list; 265 rqd->ppa_list[0] = ppa; 266 } 267 268 for (i = 0; i < nr_secs; i++) { 269 lba_list_media[i] = meta_list[i].lba; 270 meta_list[i].lba = lba_list_mem[i]; 271 } 272 273 /* Fill the holes in the original bio */ 274 i = 0; 275 hole = find_first_zero_bit(read_bitmap, nr_secs); 276 do { 277 int line_id = pblk_ppa_to_line(rqd->ppa_list[i]); 278 struct pblk_line *line = &pblk->lines[line_id]; 279 280 kref_put(&line->ref, pblk_line_put); 281 282 meta_list[hole].lba = lba_list_media[i]; 283 284 src_bv = new_bio->bi_io_vec[i++]; 285 dst_bv = bio->bi_io_vec[bio_init_idx + hole]; 286 287 src_p = kmap_atomic(src_bv.bv_page); 288 dst_p = kmap_atomic(dst_bv.bv_page); 289 290 memcpy(dst_p + dst_bv.bv_offset, 291 src_p + src_bv.bv_offset, 292 PBLK_EXPOSED_PAGE_SIZE); 293 294 kunmap_atomic(src_p); 295 kunmap_atomic(dst_p); 296 297 mempool_free(src_bv.bv_page, pblk->page_bio_pool); 298 299 hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); 300 } while (hole < nr_secs); 301 302 bio_put(new_bio); 303 304 /* Complete the original bio and associated request */ 305 bio_endio(bio); 306 rqd->bio = bio; 307 rqd->nr_ppas = nr_secs; 308 309 __pblk_end_io_read(pblk, rqd, false); 310 return NVM_IO_OK; 311 312err: 313 pr_err("pblk: failed to perform partial read\n"); 314 315 /* Free allocated pages in new bio */ 316 pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); 317 __pblk_end_io_read(pblk, rqd, false); 318 return NVM_IO_ERR; 319} 320 321static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, 322 sector_t lba, unsigned long *read_bitmap) 323{ 324 struct pblk_sec_meta *meta_list = rqd->meta_list; 325 struct bio *bio = rqd->bio; 326 struct ppa_addr ppa; 327 328 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 329 330#ifdef CONFIG_NVM_DEBUG 331 atomic_long_inc(&pblk->inflight_reads); 332#endif 333 334retry: 335 if (pblk_ppa_empty(ppa)) { 336 WARN_ON(test_and_set_bit(0, read_bitmap)); 337 meta_list[0].lba = cpu_to_le64(ADDR_EMPTY); 338 return; 339 } 340 341 /* Try to read from write buffer. The address is later checked on the 342 * write buffer to prevent retrieving overwritten data. 343 */ 344 if (pblk_addr_in_cache(ppa)) { 345 if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) { 346 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 347 goto retry; 348 } 349 350 WARN_ON(test_and_set_bit(0, read_bitmap)); 351 meta_list[0].lba = cpu_to_le64(lba); 352 353#ifdef CONFIG_NVM_DEBUG 354 atomic_long_inc(&pblk->cache_reads); 355#endif 356 } else { 357 rqd->ppa_addr = ppa; 358 } 359 360 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 361} 362 363int pblk_submit_read(struct pblk *pblk, struct bio *bio) 364{ 365 struct nvm_tgt_dev *dev = pblk->dev; 366 struct request_queue *q = dev->q; 367 sector_t blba = pblk_get_lba(bio); 368 unsigned int nr_secs = pblk_get_secs(bio); 369 struct pblk_g_ctx *r_ctx; 370 struct nvm_rq *rqd; 371 unsigned int bio_init_idx; 372 unsigned long read_bitmap; /* Max 64 ppas per request */ 373 int ret = NVM_IO_ERR; 374 375 /* logic error: lba out-of-bounds. Ignore read request */ 376 if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { 377 WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", 378 (unsigned long long)blba, nr_secs); 379 return NVM_IO_ERR; 380 } 381 382 generic_start_io_acct(q, READ, bio_sectors(bio), &pblk->disk->part0); 383 384 bitmap_zero(&read_bitmap, nr_secs); 385 386 rqd = pblk_alloc_rqd(pblk, PBLK_READ); 387 388 rqd->opcode = NVM_OP_PREAD; 389 rqd->bio = bio; 390 rqd->nr_ppas = nr_secs; 391 rqd->private = pblk; 392 rqd->end_io = pblk_end_io_read; 393 394 r_ctx = nvm_rq_to_pdu(rqd); 395 r_ctx->start_time = jiffies; 396 r_ctx->lba = blba; 397 398 /* Save the index for this bio's start. This is needed in case 399 * we need to fill a partial read. 400 */ 401 bio_init_idx = pblk_get_bi_idx(bio); 402 403 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 404 &rqd->dma_meta_list); 405 if (!rqd->meta_list) { 406 pr_err("pblk: not able to allocate ppa list\n"); 407 goto fail_rqd_free; 408 } 409 410 if (nr_secs > 1) { 411 rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; 412 rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; 413 414 pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap); 415 } else { 416 pblk_read_rq(pblk, rqd, blba, &read_bitmap); 417 } 418 419 bio_get(bio); 420 if (bitmap_full(&read_bitmap, nr_secs)) { 421 bio_endio(bio); 422 atomic_inc(&pblk->inflight_io); 423 __pblk_end_io_read(pblk, rqd, false); 424 return NVM_IO_OK; 425 } 426 427 /* All sectors are to be read from the device */ 428 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { 429 struct bio *int_bio = NULL; 430 431 /* Clone read bio to deal with read errors internally */ 432 int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); 433 if (!int_bio) { 434 pr_err("pblk: could not clone read bio\n"); 435 goto fail_end_io; 436 } 437 438 rqd->bio = int_bio; 439 r_ctx->private = bio; 440 441 ret = pblk_submit_read_io(pblk, rqd); 442 if (ret) { 443 pr_err("pblk: read IO submission failed\n"); 444 if (int_bio) 445 bio_put(int_bio); 446 goto fail_end_io; 447 } 448 449 return NVM_IO_OK; 450 } 451 452 /* The read bio request could be partially filled by the write buffer, 453 * but there are some holes that need to be read from the drive. 454 */ 455 return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); 456 457fail_rqd_free: 458 pblk_free_rqd(pblk, rqd, PBLK_READ); 459 return ret; 460fail_end_io: 461 __pblk_end_io_read(pblk, rqd, false); 462 return ret; 463} 464 465static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, 466 struct pblk_line *line, u64 *lba_list, 467 u64 *paddr_list_gc, unsigned int nr_secs) 468{ 469 struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; 470 struct ppa_addr ppa_gc; 471 int valid_secs = 0; 472 int i; 473 474 pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs); 475 476 for (i = 0; i < nr_secs; i++) { 477 if (lba_list[i] == ADDR_EMPTY) 478 continue; 479 480 ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id); 481 if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) { 482 paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY; 483 continue; 484 } 485 486 rqd->ppa_list[valid_secs++] = ppa_list_l2p[i]; 487 } 488 489#ifdef CONFIG_NVM_DEBUG 490 atomic_long_add(valid_secs, &pblk->inflight_reads); 491#endif 492 493 return valid_secs; 494} 495 496static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, 497 struct pblk_line *line, sector_t lba, 498 u64 paddr_gc) 499{ 500 struct ppa_addr ppa_l2p, ppa_gc; 501 int valid_secs = 0; 502 503 if (lba == ADDR_EMPTY) 504 goto out; 505 506 /* logic error: lba out-of-bounds */ 507 if (lba >= pblk->rl.nr_secs) { 508 WARN(1, "pblk: read lba out of bounds\n"); 509 goto out; 510 } 511 512 spin_lock(&pblk->trans_lock); 513 ppa_l2p = pblk_trans_map_get(pblk, lba); 514 spin_unlock(&pblk->trans_lock); 515 516 ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id); 517 if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) 518 goto out; 519 520 rqd->ppa_addr = ppa_l2p; 521 valid_secs = 1; 522 523#ifdef CONFIG_NVM_DEBUG 524 atomic_long_inc(&pblk->inflight_reads); 525#endif 526 527out: 528 return valid_secs; 529} 530 531int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) 532{ 533 struct nvm_tgt_dev *dev = pblk->dev; 534 struct nvm_geo *geo = &dev->geo; 535 struct bio *bio; 536 struct nvm_rq rqd; 537 int data_len; 538 int ret = NVM_IO_OK; 539 540 memset(&rqd, 0, sizeof(struct nvm_rq)); 541 542 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 543 &rqd.dma_meta_list); 544 if (!rqd.meta_list) 545 return -ENOMEM; 546 547 if (gc_rq->nr_secs > 1) { 548 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; 549 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; 550 551 gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, 552 gc_rq->lba_list, 553 gc_rq->paddr_list, 554 gc_rq->nr_secs); 555 if (gc_rq->secs_to_gc == 1) 556 rqd.ppa_addr = rqd.ppa_list[0]; 557 } else { 558 gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line, 559 gc_rq->lba_list[0], 560 gc_rq->paddr_list[0]); 561 } 562 563 if (!(gc_rq->secs_to_gc)) 564 goto out; 565 566 data_len = (gc_rq->secs_to_gc) * geo->csecs; 567 bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len, 568 PBLK_VMALLOC_META, GFP_KERNEL); 569 if (IS_ERR(bio)) { 570 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); 571 goto err_free_dma; 572 } 573 574 bio->bi_iter.bi_sector = 0; /* internal bio */ 575 bio_set_op_attrs(bio, REQ_OP_READ, 0); 576 577 rqd.opcode = NVM_OP_PREAD; 578 rqd.nr_ppas = gc_rq->secs_to_gc; 579 rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 580 rqd.bio = bio; 581 582 if (pblk_submit_io_sync(pblk, &rqd)) { 583 ret = -EIO; 584 pr_err("pblk: GC read request failed\n"); 585 goto err_free_bio; 586 } 587 588 atomic_dec(&pblk->inflight_io); 589 590 if (rqd.error) { 591 atomic_long_inc(&pblk->read_failed_gc); 592#ifdef CONFIG_NVM_DEBUG 593 pblk_print_failed_rqd(pblk, &rqd, rqd.error); 594#endif 595 } 596 597#ifdef CONFIG_NVM_DEBUG 598 atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads); 599 atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads); 600 atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); 601#endif 602 603out: 604 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); 605 return ret; 606 607err_free_bio: 608 bio_put(bio); 609err_free_dma: 610 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); 611 return ret; 612}