Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.13-rc2 542 lines 14 kB view raw
1/* 2 * Copyright (C) 2016 CNEX Labs 3 * Initial release: Javier Gonzalez <javier@cnexlabs.com> 4 * Matias Bjorling <matias@cnexlabs.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License version 8 * 2 as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * pblk-read.c - pblk's read path 16 */ 17 18#include "pblk.h" 19 20/* 21 * There is no guarantee that the value read from cache has not been updated and 22 * resides at another location in the cache. We guarantee though that if the 23 * value is read from the cache, it belongs to the mapped lba. In order to 24 * guarantee and order between writes and reads are ordered, a flush must be 25 * issued. 26 */ 27static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, 28 sector_t lba, struct ppa_addr ppa, 29 int bio_iter) 30{ 31#ifdef CONFIG_NVM_DEBUG 32 /* Callers must ensure that the ppa points to a cache address */ 33 BUG_ON(pblk_ppa_empty(ppa)); 34 BUG_ON(!pblk_addr_in_cache(ppa)); 35#endif 36 37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter); 38} 39 40static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 41 unsigned long *read_bitmap) 42{ 43 struct bio *bio = rqd->bio; 44 struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; 45 sector_t blba = pblk_get_lba(bio); 46 int nr_secs = rqd->nr_ppas; 47 int advanced_bio = 0; 48 int i, j = 0; 49 50 /* logic error: lba out-of-bounds. Ignore read request */ 51 if (blba + nr_secs >= pblk->rl.nr_secs) { 52 WARN(1, "pblk: read lbas out of bounds\n"); 53 return; 54 } 55 56 pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); 57 58 for (i = 0; i < nr_secs; i++) { 59 struct ppa_addr p = ppas[i]; 60 sector_t lba = blba + i; 61 62retry: 63 if (pblk_ppa_empty(p)) { 64 WARN_ON(test_and_set_bit(i, read_bitmap)); 65 continue; 66 } 67 68 /* Try to read from write buffer. The address is later checked 69 * on the write buffer to prevent retrieving overwritten data. 70 */ 71 if (pblk_addr_in_cache(p)) { 72 if (!pblk_read_from_cache(pblk, bio, lba, p, i)) { 73 pblk_lookup_l2p_seq(pblk, &p, lba, 1); 74 goto retry; 75 } 76 WARN_ON(test_and_set_bit(i, read_bitmap)); 77 advanced_bio = 1; 78#ifdef CONFIG_NVM_DEBUG 79 atomic_long_inc(&pblk->cache_reads); 80#endif 81 } else { 82 /* Read from media non-cached sectors */ 83 rqd->ppa_list[j++] = p; 84 } 85 86 if (advanced_bio) 87 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 88 } 89 90 if (pblk_io_aligned(pblk, nr_secs)) 91 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); 92 else 93 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 94 95#ifdef CONFIG_NVM_DEBUG 96 atomic_long_add(nr_secs, &pblk->inflight_reads); 97#endif 98} 99 100static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) 101{ 102 int err; 103 104 err = pblk_submit_io(pblk, rqd); 105 if (err) 106 return NVM_IO_ERR; 107 108 return NVM_IO_OK; 109} 110 111static void pblk_end_io_read(struct nvm_rq *rqd) 112{ 113 struct pblk *pblk = rqd->private; 114 struct nvm_tgt_dev *dev = pblk->dev; 115 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 116 struct bio *bio = rqd->bio; 117 118 if (rqd->error) 119 pblk_log_read_err(pblk, rqd); 120#ifdef CONFIG_NVM_DEBUG 121 else 122 WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); 123#endif 124 125 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); 126 127 bio_put(bio); 128 if (r_ctx->private) { 129 struct bio *orig_bio = r_ctx->private; 130 131#ifdef CONFIG_NVM_DEBUG 132 WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n"); 133#endif 134 bio_endio(orig_bio); 135 bio_put(orig_bio); 136 } 137 138#ifdef CONFIG_NVM_DEBUG 139 atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); 140 atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); 141#endif 142 143 pblk_free_rqd(pblk, rqd, READ); 144 atomic_dec(&pblk->inflight_io); 145} 146 147static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, 148 unsigned int bio_init_idx, 149 unsigned long *read_bitmap) 150{ 151 struct bio *new_bio, *bio = rqd->bio; 152 struct bio_vec src_bv, dst_bv; 153 void *ppa_ptr = NULL; 154 void *src_p, *dst_p; 155 dma_addr_t dma_ppa_list = 0; 156 int nr_secs = rqd->nr_ppas; 157 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 158 int i, ret, hole; 159 DECLARE_COMPLETION_ONSTACK(wait); 160 161 new_bio = bio_alloc(GFP_KERNEL, nr_holes); 162 if (!new_bio) { 163 pr_err("pblk: could not alloc read bio\n"); 164 return NVM_IO_ERR; 165 } 166 167 if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) 168 goto err; 169 170 if (nr_holes != new_bio->bi_vcnt) { 171 pr_err("pblk: malformed bio\n"); 172 goto err; 173 } 174 175 new_bio->bi_iter.bi_sector = 0; /* internal bio */ 176 bio_set_op_attrs(new_bio, REQ_OP_READ, 0); 177 new_bio->bi_private = &wait; 178 new_bio->bi_end_io = pblk_end_bio_sync; 179 180 rqd->bio = new_bio; 181 rqd->nr_ppas = nr_holes; 182 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 183 rqd->end_io = NULL; 184 185 if (unlikely(nr_secs > 1 && nr_holes == 1)) { 186 ppa_ptr = rqd->ppa_list; 187 dma_ppa_list = rqd->dma_ppa_list; 188 rqd->ppa_addr = rqd->ppa_list[0]; 189 } 190 191 ret = pblk_submit_read_io(pblk, rqd); 192 if (ret) { 193 bio_put(rqd->bio); 194 pr_err("pblk: read IO submission failed\n"); 195 goto err; 196 } 197 198 if (!wait_for_completion_io_timeout(&wait, 199 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 200 pr_err("pblk: partial read I/O timed out\n"); 201 } 202 203 if (rqd->error) { 204 atomic_long_inc(&pblk->read_failed); 205#ifdef CONFIG_NVM_DEBUG 206 pblk_print_failed_rqd(pblk, rqd, rqd->error); 207#endif 208 } 209 210 if (unlikely(nr_secs > 1 && nr_holes == 1)) { 211 rqd->ppa_list = ppa_ptr; 212 rqd->dma_ppa_list = dma_ppa_list; 213 } 214 215 /* Fill the holes in the original bio */ 216 i = 0; 217 hole = find_first_zero_bit(read_bitmap, nr_secs); 218 do { 219 src_bv = new_bio->bi_io_vec[i++]; 220 dst_bv = bio->bi_io_vec[bio_init_idx + hole]; 221 222 src_p = kmap_atomic(src_bv.bv_page); 223 dst_p = kmap_atomic(dst_bv.bv_page); 224 225 memcpy(dst_p + dst_bv.bv_offset, 226 src_p + src_bv.bv_offset, 227 PBLK_EXPOSED_PAGE_SIZE); 228 229 kunmap_atomic(src_p); 230 kunmap_atomic(dst_p); 231 232 mempool_free(src_bv.bv_page, pblk->page_pool); 233 234 hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); 235 } while (hole < nr_secs); 236 237 bio_put(new_bio); 238 239 /* Complete the original bio and associated request */ 240 rqd->bio = bio; 241 rqd->nr_ppas = nr_secs; 242 rqd->private = pblk; 243 244 bio_endio(bio); 245 pblk_end_io_read(rqd); 246 return NVM_IO_OK; 247 248err: 249 /* Free allocated pages in new bio */ 250 pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); 251 rqd->private = pblk; 252 pblk_end_io_read(rqd); 253 return NVM_IO_ERR; 254} 255 256static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, 257 unsigned long *read_bitmap) 258{ 259 struct bio *bio = rqd->bio; 260 struct ppa_addr ppa; 261 sector_t lba = pblk_get_lba(bio); 262 263 /* logic error: lba out-of-bounds. Ignore read request */ 264 if (lba >= pblk->rl.nr_secs) { 265 WARN(1, "pblk: read lba out of bounds\n"); 266 return; 267 } 268 269 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 270 271#ifdef CONFIG_NVM_DEBUG 272 atomic_long_inc(&pblk->inflight_reads); 273#endif 274 275retry: 276 if (pblk_ppa_empty(ppa)) { 277 WARN_ON(test_and_set_bit(0, read_bitmap)); 278 return; 279 } 280 281 /* Try to read from write buffer. The address is later checked on the 282 * write buffer to prevent retrieving overwritten data. 283 */ 284 if (pblk_addr_in_cache(ppa)) { 285 if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) { 286 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 287 goto retry; 288 } 289 WARN_ON(test_and_set_bit(0, read_bitmap)); 290#ifdef CONFIG_NVM_DEBUG 291 atomic_long_inc(&pblk->cache_reads); 292#endif 293 } else { 294 rqd->ppa_addr = ppa; 295 } 296 297 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 298} 299 300int pblk_submit_read(struct pblk *pblk, struct bio *bio) 301{ 302 struct nvm_tgt_dev *dev = pblk->dev; 303 unsigned int nr_secs = pblk_get_secs(bio); 304 struct nvm_rq *rqd; 305 unsigned long read_bitmap; /* Max 64 ppas per request */ 306 unsigned int bio_init_idx; 307 int ret = NVM_IO_ERR; 308 309 if (nr_secs > PBLK_MAX_REQ_ADDRS) 310 return NVM_IO_ERR; 311 312 bitmap_zero(&read_bitmap, nr_secs); 313 314 rqd = pblk_alloc_rqd(pblk, READ); 315 if (IS_ERR(rqd)) { 316 pr_err_ratelimited("pblk: not able to alloc rqd"); 317 return NVM_IO_ERR; 318 } 319 320 rqd->opcode = NVM_OP_PREAD; 321 rqd->bio = bio; 322 rqd->nr_ppas = nr_secs; 323 rqd->private = pblk; 324 rqd->end_io = pblk_end_io_read; 325 326 /* Save the index for this bio's start. This is needed in case 327 * we need to fill a partial read. 328 */ 329 bio_init_idx = pblk_get_bi_idx(bio); 330 331 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 332 &rqd->dma_meta_list); 333 if (!rqd->meta_list) { 334 pr_err("pblk: not able to allocate ppa list\n"); 335 goto fail_rqd_free; 336 } 337 338 if (nr_secs > 1) { 339 rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; 340 rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; 341 342 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); 343 } else { 344 pblk_read_rq(pblk, rqd, &read_bitmap); 345 } 346 347 bio_get(bio); 348 if (bitmap_full(&read_bitmap, nr_secs)) { 349 bio_endio(bio); 350 atomic_inc(&pblk->inflight_io); 351 pblk_end_io_read(rqd); 352 return NVM_IO_OK; 353 } 354 355 /* All sectors are to be read from the device */ 356 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { 357 struct bio *int_bio = NULL; 358 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 359 360 /* Clone read bio to deal with read errors internally */ 361 int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); 362 if (!int_bio) { 363 pr_err("pblk: could not clone read bio\n"); 364 return NVM_IO_ERR; 365 } 366 367 rqd->bio = int_bio; 368 r_ctx->private = bio; 369 370 ret = pblk_submit_read_io(pblk, rqd); 371 if (ret) { 372 pr_err("pblk: read IO submission failed\n"); 373 if (int_bio) 374 bio_put(int_bio); 375 return ret; 376 } 377 378 return NVM_IO_OK; 379 } 380 381 /* The read bio request could be partially filled by the write buffer, 382 * but there are some holes that need to be read from the drive. 383 */ 384 ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); 385 if (ret) { 386 pr_err("pblk: failed to perform partial read\n"); 387 return ret; 388 } 389 390 return NVM_IO_OK; 391 392fail_rqd_free: 393 pblk_free_rqd(pblk, rqd, READ); 394 return ret; 395} 396 397static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, 398 struct pblk_line *line, u64 *lba_list, 399 unsigned int nr_secs) 400{ 401 struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; 402 int valid_secs = 0; 403 int i; 404 405 pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); 406 407 for (i = 0; i < nr_secs; i++) { 408 if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || 409 pblk_ppa_empty(ppas[i])) { 410 lba_list[i] = ADDR_EMPTY; 411 continue; 412 } 413 414 rqd->ppa_list[valid_secs++] = ppas[i]; 415 } 416 417#ifdef CONFIG_NVM_DEBUG 418 atomic_long_add(valid_secs, &pblk->inflight_reads); 419#endif 420 return valid_secs; 421} 422 423static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, 424 struct pblk_line *line, sector_t lba) 425{ 426 struct ppa_addr ppa; 427 int valid_secs = 0; 428 429 if (lba == ADDR_EMPTY) 430 goto out; 431 432 /* logic error: lba out-of-bounds */ 433 if (lba >= pblk->rl.nr_secs) { 434 WARN(1, "pblk: read lba out of bounds\n"); 435 goto out; 436 } 437 438 spin_lock(&pblk->trans_lock); 439 ppa = pblk_trans_map_get(pblk, lba); 440 spin_unlock(&pblk->trans_lock); 441 442 /* Ignore updated values until the moment */ 443 if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || 444 pblk_ppa_empty(ppa)) 445 goto out; 446 447 rqd->ppa_addr = ppa; 448 valid_secs = 1; 449 450#ifdef CONFIG_NVM_DEBUG 451 atomic_long_inc(&pblk->inflight_reads); 452#endif 453 454out: 455 return valid_secs; 456} 457 458int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, 459 unsigned int nr_secs, unsigned int *secs_to_gc, 460 struct pblk_line *line) 461{ 462 struct nvm_tgt_dev *dev = pblk->dev; 463 struct nvm_geo *geo = &dev->geo; 464 struct bio *bio; 465 struct nvm_rq rqd; 466 int ret, data_len; 467 DECLARE_COMPLETION_ONSTACK(wait); 468 469 memset(&rqd, 0, sizeof(struct nvm_rq)); 470 471 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 472 &rqd.dma_meta_list); 473 if (!rqd.meta_list) 474 return NVM_IO_ERR; 475 476 if (nr_secs > 1) { 477 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; 478 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; 479 480 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, 481 nr_secs); 482 if (*secs_to_gc == 1) 483 rqd.ppa_addr = rqd.ppa_list[0]; 484 } else { 485 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); 486 } 487 488 if (!(*secs_to_gc)) 489 goto out; 490 491 data_len = (*secs_to_gc) * geo->sec_size; 492 bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len, 493 PBLK_KMALLOC_META, GFP_KERNEL); 494 if (IS_ERR(bio)) { 495 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); 496 goto err_free_dma; 497 } 498 499 bio->bi_iter.bi_sector = 0; /* internal bio */ 500 bio_set_op_attrs(bio, REQ_OP_READ, 0); 501 502 rqd.opcode = NVM_OP_PREAD; 503 rqd.end_io = pblk_end_io_sync; 504 rqd.private = &wait; 505 rqd.nr_ppas = *secs_to_gc; 506 rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); 507 rqd.bio = bio; 508 509 ret = pblk_submit_read_io(pblk, &rqd); 510 if (ret) { 511 bio_endio(bio); 512 pr_err("pblk: GC read request failed\n"); 513 goto err_free_dma; 514 } 515 516 if (!wait_for_completion_io_timeout(&wait, 517 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 518 pr_err("pblk: GC read I/O timed out\n"); 519 } 520 atomic_dec(&pblk->inflight_io); 521 522 if (rqd.error) { 523 atomic_long_inc(&pblk->read_failed_gc); 524#ifdef CONFIG_NVM_DEBUG 525 pblk_print_failed_rqd(pblk, &rqd, rqd.error); 526#endif 527 } 528 529#ifdef CONFIG_NVM_DEBUG 530 atomic_long_add(*secs_to_gc, &pblk->sync_reads); 531 atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); 532 atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); 533#endif 534 535out: 536 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); 537 return NVM_IO_OK; 538 539err_free_dma: 540 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); 541 return NVM_IO_ERR; 542}