Merge tag 'block-5.6-2020-02-05' of git://git.kernel.dk/linux-block

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'block-5.6-2020-02-05' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
"Some later arrivals, but all fixes at this point:

- bcache fix series (Coly)

- Series of BFQ fixes (Paolo)

- NVMe pull request from Keith with a few minor NVMe fixes

- Various little tweaks"

* tag 'block-5.6-2020-02-05' of git://git.kernel.dk/linux-block: (23 commits)
nvmet: update AEN list and array at one place
nvmet: Fix controller use after free
nvmet: Fix error print message at nvmet_install_queue function
brd: check and limit max_part par
nvme-pci: remove nvmeq->tags
nvmet: fix dsm failure when payload does not match sgl descriptor
nvmet: Pass lockdep expression to RCU lists
block, bfq: clarify the goal of bfq_split_bfqq()
block, bfq: get a ref to a group when adding it to a service tree
block, bfq: remove ifdefs from around gets/puts of bfq groups
block, bfq: extend incomplete name of field on_st
block, bfq: get extra ref to prevent a queue from being freed during a group move
block, bfq: do not insert oom queue into position tree
block, bfq: do not plug I/O for bfq_queues with no proc refs
bcache: check return value of prio_read()
bcache: fix incorrect data type usage in btree_flush_write()
bcache: add readahead cache policy options via sysfs interface
bcache: explicity type cast in bset_bkey_last()
bcache: fix memory corruption in bch_cache_accounting_clear()
xen/blkfront: limit allocated memory size to actual use case
...

Linus Torvalds 6 years ago ed535f2c 03840663

+230 -94

25 changed files

expand all collapse all

MAINTAINERS

block

bfq-cgroup.c

bfq-iosched.c

bfq-iosched.h

bfq-wf2q.c

drivers

block

brd.c

drbd

drbd_int.h

drbd_nl.c

drbd_receiver.c

drbd_worker.c

nbd.c

xen-blkfront.c

bcache

bcache.h

bset.h

journal.c

request.c

stats.c

super.c

sysfs.c

nvme

host

pci.c

target

core.c

fabrics-cmd.c

io-cmd-bdev.c

io-cmd-file.c

nvmet.h

+1 -1

MAINTAINERS

reviewed

··· 14947 14947 F: drivers/mmc/host/sdhci-omap.c 14948 14948 14949 14949 SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER 14950 14950 - M: Scott Bauer <scott.bauer@intel.com> 14951 14950 M: Jonathan Derrick <jonathan.derrick@intel.com> 14951 14951 + M: Revanth Rajashekar <revanth.rajashekar@intel.com> 14952 14952 L: linux-block@vger.kernel.org 14953 14953 S: Supported 14954 14954 F: block/sed*

+14 -2

block/bfq-cgroup.c

reviewed

··· 332 332 kfree(bfqg); 333 333 } 334 334 335 335 - static void bfqg_and_blkg_get(struct bfq_group *bfqg) 335 335 + void bfqg_and_blkg_get(struct bfq_group *bfqg) 336 336 { 337 337 /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ 338 338 bfqg_get(bfqg); ··· 651 651 bfq_bfqq_expire(bfqd, bfqd->in_service_queue, 652 652 false, BFQQE_PREEMPTED); 653 653 654 654 + /* 655 655 + * get extra reference to prevent bfqq from being freed in 656 656 + * next possible deactivate 657 657 + */ 658 658 + bfqq->ref++; 659 659 + 654 660 if (bfq_bfqq_busy(bfqq)) 655 661 bfq_deactivate_bfqq(bfqd, bfqq, false, false); 656 656 - else if (entity->on_st) 662 662 + else if (entity->on_st_or_in_serv) 657 663 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); 658 664 bfqg_and_blkg_put(bfqq_group(bfqq)); 659 665 ··· 676 670 677 671 if (!bfqd->in_service_queue && !bfqd->rq_in_driver) 678 672 bfq_schedule_dispatch(bfqd); 673 673 + /* release extra ref taken above */ 674 674 + bfq_put_queue(bfqq); 679 675 } 680 676 681 677 /** ··· 1405 1397 { 1406 1398 return bfqq->bfqd->root_group; 1407 1399 } 1400 1400 + 1401 1401 + void bfqg_and_blkg_get(struct bfq_group *bfqg) {} 1402 1402 + 1403 1403 + void bfqg_and_blkg_put(struct bfq_group *bfqg) {} 1408 1404 1409 1405 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) 1410 1406 {

+20 -6

block/bfq-iosched.c

reviewed

··· 613 613 bfqq->pos_root = NULL; 614 614 } 615 615 616 616 + /* oom_bfqq does not participate in queue merging */ 617 617 + if (bfqq == &bfqd->oom_bfqq) 618 618 + return; 619 619 + 616 620 /* 617 621 * bfqq cannot be merged any longer (see comments in 618 622 * bfq_setup_cooperator): no point in adding bfqq into the ··· 1059 1055 1060 1056 static int bfqq_process_refs(struct bfq_queue *bfqq) 1061 1057 { 1062 1062 - return bfqq->ref - bfqq->allocated - bfqq->entity.on_st - 1058 1058 + return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - 1063 1059 (bfqq->weight_counter != NULL); 1064 1060 } 1065 1061 ··· 3447 3443 static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, 3448 3444 struct bfq_queue *bfqq) 3449 3445 { 3446 3446 + /* No point in idling for bfqq if it won't get requests any longer */ 3447 3447 + if (unlikely(!bfqq_process_refs(bfqq))) 3448 3448 + return false; 3449 3449 + 3450 3450 return (bfqq->wr_coeff > 1 && 3451 3451 (bfqd->wr_busy_queues < 3452 3452 bfq_tot_busy_queues(bfqd) || ··· 4084 4076 bfqq_sequential_and_IO_bound, 4085 4077 idling_boosts_thr; 4086 4078 4079 4079 + /* No point in idling for bfqq if it won't get requests any longer */ 4080 4080 + if (unlikely(!bfqq_process_refs(bfqq))) 4081 4081 + return false; 4082 4082 + 4087 4083 bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && 4088 4084 bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); 4089 4085 ··· 4180 4168 { 4181 4169 struct bfq_data *bfqd = bfqq->bfqd; 4182 4170 bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar; 4171 4171 + 4172 4172 + /* No point in idling for bfqq if it won't get requests any longer */ 4173 4173 + if (unlikely(!bfqq_process_refs(bfqq))) 4174 4174 + return false; 4183 4175 4184 4176 if (unlikely(bfqd->strict_guarantees)) 4185 4177 return true; ··· 4825 4809 { 4826 4810 struct bfq_queue *item; 4827 4811 struct hlist_node *n; 4828 4828 - #ifdef CONFIG_BFQ_GROUP_IOSCHED 4829 4812 struct bfq_group *bfqg = bfqq_group(bfqq); 4830 4830 - #endif 4831 4813 4832 4814 if (bfqq->bfqd) 4833 4815 bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", ··· 4898 4884 bfqq->bfqd->last_completed_rq_bfqq = NULL; 4899 4885 4900 4886 kmem_cache_free(bfq_pool, bfqq); 4901 4901 - #ifdef CONFIG_BFQ_GROUP_IOSCHED 4902 4887 bfqg_and_blkg_put(bfqg); 4903 4903 - #endif 4904 4888 } 4905 4889 4906 4890 static void bfq_put_cooperator(struct bfq_queue *bfqq) ··· 5979 5967 } 5980 5968 5981 5969 /* 5970 5970 + * Removes the association between the current task and bfqq, assuming 5971 5971 + * that bic points to the bfq iocontext of the task. 5982 5972 * Returns NULL if a new bfqq should be allocated, or the old bfqq if this 5983 5973 * was the last process referring to that bfqq. 5984 5974 */ ··· 6388 6374 6389 6375 hrtimer_cancel(&bfqd->idle_slice_timer); 6390 6376 6391 6391 - #ifdef CONFIG_BFQ_GROUP_IOSCHED 6392 6377 /* release oom-queue reference to root group */ 6393 6378 bfqg_and_blkg_put(bfqd->root_group); 6394 6379 6380 6380 + #ifdef CONFIG_BFQ_GROUP_IOSCHED 6395 6381 blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); 6396 6382 #else 6397 6383 spin_lock_irq(&bfqd->lock);

+3 -1

block/bfq-iosched.h

reviewed

··· 150 150 * Flag, true if the entity is on a tree (either the active or 151 151 * the idle one of its service_tree) or is in service. 152 152 */ 153 153 - bool on_st; 153 153 + bool on_st_or_in_serv; 154 154 155 155 /* B-WF2Q+ start and finish timestamps [sectors/weight] */ 156 156 u64 start, finish; ··· 921 921 922 922 #else 923 923 struct bfq_group { 924 924 + struct bfq_entity entity; 924 925 struct bfq_sched_data sched_data; 925 926 926 927 struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; ··· 985 984 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); 986 985 struct bfq_group *bfqq_group(struct bfq_queue *bfqq); 987 986 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); 987 987 + void bfqg_and_blkg_get(struct bfq_group *bfqg); 988 988 void bfqg_and_blkg_put(struct bfq_group *bfqg); 989 989 990 990 #ifdef CONFIG_BFQ_GROUP_IOSCHED

+17 -6

block/bfq-wf2q.c

reviewed

··· 533 533 bfqq->ref++; 534 534 bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", 535 535 bfqq, bfqq->ref); 536 536 - } 536 536 + } else 537 537 + bfqg_and_blkg_get(container_of(entity, struct bfq_group, 538 538 + entity)); 537 539 } 538 540 539 541 /** ··· 647 645 { 648 646 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); 649 647 650 650 - entity->on_st = false; 648 648 + entity->on_st_or_in_serv = false; 651 649 st->wsum -= entity->weight; 652 652 - if (bfqq && !is_in_service) 650 650 + if (is_in_service) 651 651 + return; 652 652 + 653 653 + if (bfqq) 653 654 bfq_put_queue(bfqq); 655 655 + else 656 656 + bfqg_and_blkg_put(container_of(entity, struct bfq_group, 657 657 + entity)); 654 658 } 655 659 656 660 /** ··· 1007 999 */ 1008 1000 bfq_get_entity(entity); 1009 1001 1010 1010 - entity->on_st = true; 1002 1002 + entity->on_st_or_in_serv = true; 1011 1003 } 1012 1004 1013 1005 #ifdef CONFIG_BFQ_GROUP_IOSCHED ··· 1173 1165 struct bfq_service_tree *st; 1174 1166 bool is_in_service; 1175 1167 1176 1176 - if (!entity->on_st) /* entity never activated, or already inactive */ 1168 1168 + if (!entity->on_st_or_in_serv) /* 1169 1169 + * entity never activated, or 1170 1170 + * already inactive 1171 1171 + */ 1177 1172 return false; 1178 1173 1179 1174 /* ··· 1631 1620 * service tree either, then release the service reference to 1632 1621 * the queue it represents (taken with bfq_get_entity). 1633 1622 */ 1634 1634 - if (!in_serv_entity->on_st) { 1623 1623 + if (!in_serv_entity->on_st_or_in_serv) { 1635 1624 /* 1636 1625 * If no process is referencing in_serv_bfqq any 1637 1626 * longer, then the service reference may be the only

+20 -2

drivers/block/brd.c

reviewed

··· 473 473 return kobj; 474 474 } 475 475 476 476 + static inline void brd_check_and_reset_par(void) 477 477 + { 478 478 + if (unlikely(!max_part)) 479 479 + max_part = 1; 480 480 + 481 481 + /* 482 482 + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), 483 483 + * otherwise, it is possiable to get same dev_t when adding partitions. 484 484 + */ 485 485 + if ((1U << MINORBITS) % max_part != 0) 486 486 + max_part = 1UL << fls(max_part); 487 487 + 488 488 + if (max_part > DISK_MAX_PARTS) { 489 489 + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", 490 490 + DISK_MAX_PARTS, DISK_MAX_PARTS); 491 491 + max_part = DISK_MAX_PARTS; 492 492 + } 493 493 + } 494 494 + 476 495 static int __init brd_init(void) 477 496 { 478 497 struct brd_device *brd, *next; ··· 515 496 if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) 516 497 return -EIO; 517 498 518 518 - if (unlikely(!max_part)) 519 519 - max_part = 1; 499 499 + brd_check_and_reset_par(); 520 500 521 501 for (i = 0; i < rd_nr; i++) { 522 502 brd = brd_alloc(i);

+1 -1

drivers/block/drbd/drbd_int.h

reviewed

··· 622 622 int total; /* sum of all values */ 623 623 int values[0]; 624 624 }; 625 625 - extern struct fifo_buffer *fifo_alloc(int fifo_size); 625 625 + extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size); 626 626 627 627 /* flag bits per connection */ 628 628 enum {

+2 -1

drivers/block/drbd/drbd_nl.c

reviewed

··· 1575 1575 struct drbd_device *device; 1576 1576 struct disk_conf *new_disk_conf, *old_disk_conf; 1577 1577 struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 1578 1578 - int err, fifo_size; 1578 1578 + int err; 1579 1579 + unsigned int fifo_size; 1579 1580 1580 1581 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); 1581 1582 if (!adm_ctx.reply_skb)

+1 -1

drivers/block/drbd/drbd_receiver.c

reviewed

··· 3887 3887 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 3888 3888 const int apv = connection->agreed_pro_version; 3889 3889 struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 3890 3890 - int fifo_size = 0; 3890 3890 + unsigned int fifo_size = 0; 3891 3891 int err; 3892 3892 3893 3893 peer_device = conn_peer_device(connection, pi->vnr);

+2 -2

drivers/block/drbd/drbd_worker.c

reviewed

··· 482 482 fb->values[i] += value; 483 483 } 484 484 485 485 - struct fifo_buffer *fifo_alloc(int fifo_size) 485 485 + struct fifo_buffer *fifo_alloc(unsigned int fifo_size) 486 486 { 487 487 struct fifo_buffer *fb; 488 488 489 489 - fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); 489 489 + fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO); 490 490 if (!fb) 491 491 return NULL; 492 492

+10

drivers/block/nbd.c

reviewed

··· 1265 1265 args = kzalloc(sizeof(*args), GFP_KERNEL); 1266 1266 if (!args) { 1267 1267 sock_shutdown(nbd); 1268 1268 + /* 1269 1269 + * If num_connections is m (2 < m), 1270 1270 + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. 1271 1271 + * But NO.(n + 1) failed. We still have n recv threads. 1272 1272 + * So, add flush_workqueue here to prevent recv threads 1273 1273 + * dropping the last config_refs and trying to destroy 1274 1274 + * the workqueue from inside the workqueue. 1275 1275 + */ 1276 1276 + if (i) 1277 1277 + flush_workqueue(nbd->recv_workq); 1268 1278 return -ENOMEM; 1269 1279 } 1270 1280 sk_set_memalloc(config->socks[i]->sock->sk);

+3 -5

drivers/block/xen-blkfront.c

reviewed

··· 151 151 #define BLK_RING_SIZE(info) \ 152 152 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 153 153 154 154 - #define BLK_MAX_RING_SIZE \ 155 155 - __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS) 156 156 - 157 154 /* 158 155 * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 159 156 * characters are enough. Define to 20 to keep consistent with backend. ··· 174 177 unsigned int evtchn, irq; 175 178 struct work_struct work; 176 179 struct gnttab_free_callback callback; 177 177 - struct blk_shadow shadow[BLK_MAX_RING_SIZE]; 178 180 struct list_head indirect_pages; 179 181 struct list_head grants; 180 182 unsigned int persistent_gnts_c; 181 183 unsigned long shadow_free; 182 184 struct blkfront_info *dev_info; 185 185 + struct blk_shadow shadow[]; 183 186 }; 184 187 185 188 /* ··· 1912 1915 info->nr_rings = 1; 1913 1916 1914 1917 info->rinfo = kvcalloc(info->nr_rings, 1915 1915 - sizeof(struct blkfront_ring_info), 1918 1918 + struct_size(info->rinfo, shadow, 1919 1919 + BLK_RING_SIZE(info)), 1916 1920 GFP_KERNEL); 1917 1921 if (!info->rinfo) { 1918 1922 xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");

drivers/md/bcache/bcache.h

reviewed

··· 330 330 */ 331 331 atomic_t has_dirty; 332 332 333 333 + #define BCH_CACHE_READA_ALL 0 334 334 + #define BCH_CACHE_READA_META_ONLY 1 335 335 + unsigned int cache_readahead_policy; 333 336 struct bch_ratelimit writeback_rate; 334 337 struct delayed_work writeback_rate_update; 335 338

+2 -1

drivers/md/bcache/bset.h

reviewed

··· 397 397 398 398 /* Bkey utility code */ 399 399 400 400 - #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) 400 400 + #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ 401 401 + (unsigned int)(i)->keys) 401 402 402 403 static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx) 403 404 {

+2 -1

drivers/md/bcache/journal.c

reviewed

··· 422 422 static void btree_flush_write(struct cache_set *c) 423 423 { 424 424 struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; 425 425 - unsigned int i, nr, ref_nr; 425 425 + unsigned int i, nr; 426 426 + int ref_nr; 426 427 atomic_t *fifo_front_p, *now_fifo_front_p; 427 428 size_t mask; 428 429

+12 -5

drivers/md/bcache/request.c

reviewed

··· 379 379 goto skip; 380 380 381 381 /* 382 382 - * Flag for bypass if the IO is for read-ahead or background, 383 383 - * unless the read-ahead request is for metadata 382 382 + * If the bio is for read-ahead or background IO, bypass it or 383 383 + * not depends on the following situations, 384 384 + * - If the IO is for meta data, always cache it and no bypass 385 385 + * - If the IO is not meta data, check dc->cache_reada_policy, 386 386 + * BCH_CACHE_READA_ALL: cache it and not bypass 387 387 + * BCH_CACHE_READA_META_ONLY: not cache it and bypass 388 388 + * That is, read-ahead request for metadata always get cached 384 389 * (eg, for gfs2 or xfs). 385 390 */ 386 386 - if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && 387 387 - !(bio->bi_opf & (REQ_META|REQ_PRIO))) 388 388 - goto skip; 391 391 + if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { 392 392 + if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && 393 393 + (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) 394 394 + goto skip; 395 395 + } 389 396 390 397 if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || 391 398 bio_sectors(bio) & (c->sb.block_size - 1)) {

+7 -3

drivers/md/bcache/stats.c

reviewed

··· 109 109 110 110 void bch_cache_accounting_clear(struct cache_accounting *acc) 111 111 { 112 112 - memset(&acc->total.cache_hits, 113 113 - 0, 114 114 - sizeof(struct cache_stats)); 112 112 + acc->total.cache_hits = 0; 113 113 + acc->total.cache_misses = 0; 114 114 + acc->total.cache_bypass_hits = 0; 115 115 + acc->total.cache_bypass_misses = 0; 116 116 + acc->total.cache_readaheads = 0; 117 117 + acc->total.cache_miss_collisions = 0; 118 118 + acc->total.sectors_bypassed = 0; 115 119 } 116 120 117 121 void bch_cache_accounting_destroy(struct cache_accounting *acc)

+16 -5

drivers/md/bcache/super.c

reviewed

··· 609 609 return 0; 610 610 } 611 611 612 612 - static void prio_read(struct cache *ca, uint64_t bucket) 612 612 + static int prio_read(struct cache *ca, uint64_t bucket) 613 613 { 614 614 struct prio_set *p = ca->disk_buckets; 615 615 struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; 616 616 struct bucket *b; 617 617 unsigned int bucket_nr = 0; 618 618 + int ret = -EIO; 618 619 619 620 for (b = ca->buckets; 620 621 b < ca->buckets + ca->sb.nbuckets; ··· 628 627 prio_io(ca, bucket, REQ_OP_READ, 0); 629 628 630 629 if (p->csum != 631 631 - bch_crc64(&p->magic, bucket_bytes(ca) - 8)) 630 630 + bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { 632 631 pr_warn("bad csum reading priorities"); 632 632 + goto out; 633 633 + } 633 634 634 634 - if (p->magic != pset_magic(&ca->sb)) 635 635 + if (p->magic != pset_magic(&ca->sb)) { 635 636 pr_warn("bad magic reading priorities"); 637 637 + goto out; 638 638 + } 636 639 637 640 bucket = p->next_bucket; 638 641 d = p->data; ··· 645 640 b->prio = le16_to_cpu(d->prio); 646 641 b->gen = b->last_gc = d->gen; 647 642 } 643 643 + 644 644 + ret = 0; 645 645 + out: 646 646 + return ret; 648 647 } 649 648 650 649 /* Bcache device */ ··· 1882 1873 j = &list_entry(journal.prev, struct journal_replay, list)->j; 1883 1874 1884 1875 err = "IO error reading priorities"; 1885 1885 - for_each_cache(ca, c, i) 1886 1886 - prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]); 1876 1876 + for_each_cache(ca, c, i) { 1877 1877 + if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) 1878 1878 + goto err; 1879 1879 + } 1887 1880 1888 1881 /* 1889 1882 * If prio_read() fails it'll call cache_set_error and we'll

+22

drivers/md/bcache/sysfs.c

reviewed

··· 27 27 NULL 28 28 }; 29 29 30 30 + static const char * const bch_reada_cache_policies[] = { 31 31 + "all", 32 32 + "meta-only", 33 33 + NULL 34 34 + }; 35 35 + 30 36 /* Default is 0 ("auto") */ 31 37 static const char * const bch_stop_on_failure_modes[] = { 32 38 "auto", ··· 106 100 rw_attribute(sequential_cutoff); 107 101 rw_attribute(data_csum); 108 102 rw_attribute(cache_mode); 103 103 + rw_attribute(readahead_cache_policy); 109 104 rw_attribute(stop_when_cache_set_failed); 110 105 rw_attribute(writeback_metadata); 111 106 rw_attribute(writeback_running); ··· 174 167 return bch_snprint_string_list(buf, PAGE_SIZE, 175 168 bch_cache_modes, 176 169 BDEV_CACHE_MODE(&dc->sb)); 170 170 + 171 171 + if (attr == &sysfs_readahead_cache_policy) 172 172 + return bch_snprint_string_list(buf, PAGE_SIZE, 173 173 + bch_reada_cache_policies, 174 174 + dc->cache_readahead_policy); 177 175 178 176 if (attr == &sysfs_stop_when_cache_set_failed) 179 177 return bch_snprint_string_list(buf, PAGE_SIZE, ··· 365 353 } 366 354 } 367 355 356 356 + if (attr == &sysfs_readahead_cache_policy) { 357 357 + v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); 358 358 + if (v < 0) 359 359 + return v; 360 360 + 361 361 + if ((unsigned int) v != dc->cache_readahead_policy) 362 362 + dc->cache_readahead_policy = v; 363 363 + } 364 364 + 368 365 if (attr == &sysfs_stop_when_cache_set_failed) { 369 366 v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); 370 367 if (v < 0) ··· 488 467 &sysfs_data_csum, 489 468 #endif 490 469 &sysfs_cache_mode, 470 470 + &sysfs_readahead_cache_policy, 491 471 &sysfs_stop_when_cache_set_failed, 492 472 &sysfs_writeback_metadata, 493 473 &sysfs_writeback_running,

+8 -15

drivers/nvme/host/pci.c

reviewed

··· 167 167 /* only used for poll queues: */ 168 168 spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; 169 169 volatile struct nvme_completion *cqes; 170 170 - struct blk_mq_tags **tags; 171 170 dma_addr_t sq_dma_addr; 172 171 dma_addr_t cq_dma_addr; 173 172 u32 __iomem *q_db; ··· 375 376 376 377 WARN_ON(hctx_idx != 0); 377 378 WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); 378 378 - WARN_ON(nvmeq->tags); 379 379 380 380 hctx->driver_data = nvmeq; 381 381 - nvmeq->tags = &dev->admin_tagset.tags[0]; 382 381 return 0; 383 383 - } 384 384 - 385 385 - static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 386 386 - { 387 387 - struct nvme_queue *nvmeq = hctx->driver_data; 388 388 - 389 389 - nvmeq->tags = NULL; 390 382 } 391 383 392 384 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, ··· 385 395 { 386 396 struct nvme_dev *dev = data; 387 397 struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; 388 388 - 389 389 - if (!nvmeq->tags) 390 390 - nvmeq->tags = &dev->tagset.tags[hctx_idx]; 391 398 392 399 WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); 393 400 hctx->driver_data = nvmeq; ··· 935 948 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 936 949 } 937 950 951 951 + static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) 952 952 + { 953 953 + if (!nvmeq->qid) 954 954 + return nvmeq->dev->admin_tagset.tags[0]; 955 955 + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; 956 956 + } 957 957 + 938 958 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) 939 959 { 940 960 volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; ··· 966 972 return; 967 973 } 968 974 969 969 - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); 975 975 + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); 970 976 trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); 971 977 nvme_end_request(req, cqe->status, cqe->result); 972 978 } ··· 1566 1572 .queue_rq = nvme_queue_rq, 1567 1573 .complete = nvme_pci_complete_rq, 1568 1574 .init_hctx = nvme_admin_init_hctx, 1569 1569 - .exit_hctx = nvme_admin_exit_hctx, 1570 1575 .init_request = nvme_init_request, 1571 1576 .timeout = nvme_timeout, 1572 1577 };

+51 -29

drivers/nvme/target/core.c

reviewed

··· 129 129 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 130 130 } 131 131 132 132 - static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 132 132 + static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) 133 133 { 134 134 - struct nvmet_req *req; 135 135 - 136 136 - while (1) { 137 137 - mutex_lock(&ctrl->lock); 138 138 - if (!ctrl->nr_async_event_cmds) { 139 139 - mutex_unlock(&ctrl->lock); 140 140 - return; 141 141 - } 142 142 - 143 143 - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 144 144 - mutex_unlock(&ctrl->lock); 145 145 - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 146 146 - } 147 147 - } 148 148 - 149 149 - static void nvmet_async_event_work(struct work_struct *work) 150 150 - { 151 151 - struct nvmet_ctrl *ctrl = 152 152 - container_of(work, struct nvmet_ctrl, async_event_work); 153 134 struct nvmet_async_event *aen; 154 135 struct nvmet_req *req; 155 136 ··· 140 159 struct nvmet_async_event, entry); 141 160 if (!aen || !ctrl->nr_async_event_cmds) { 142 161 mutex_unlock(&ctrl->lock); 143 143 - return; 162 162 + break; 144 163 } 145 164 146 165 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 147 147 - nvmet_set_result(req, nvmet_async_event_result(aen)); 166 166 + if (status == 0) 167 167 + nvmet_set_result(req, nvmet_async_event_result(aen)); 148 168 149 169 list_del(&aen->entry); 150 170 kfree(aen); 151 171 152 172 mutex_unlock(&ctrl->lock); 153 153 - nvmet_req_complete(req, 0); 173 173 + nvmet_req_complete(req, status); 154 174 } 175 175 + } 176 176 + 177 177 + static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 178 178 + { 179 179 + struct nvmet_req *req; 180 180 + 181 181 + mutex_lock(&ctrl->lock); 182 182 + while (ctrl->nr_async_event_cmds) { 183 183 + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 184 184 + mutex_unlock(&ctrl->lock); 185 185 + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 186 186 + mutex_lock(&ctrl->lock); 187 187 + } 188 188 + mutex_unlock(&ctrl->lock); 189 189 + } 190 190 + 191 191 + static void nvmet_async_event_work(struct work_struct *work) 192 192 + { 193 193 + struct nvmet_ctrl *ctrl = 194 194 + container_of(work, struct nvmet_ctrl, async_event_work); 195 195 + 196 196 + nvmet_async_events_process(ctrl, 0); 155 197 } 156 198 157 199 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, ··· 559 555 } else { 560 556 struct nvmet_ns *old; 561 557 562 562 - list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 558 558 + list_for_each_entry_rcu(old, &subsys->namespaces, dev_link, 559 559 + lockdep_is_held(&subsys->lock)) { 563 560 BUG_ON(ns->nsid == old->nsid); 564 561 if (ns->nsid < old->nsid) 565 562 break; ··· 757 752 758 753 void nvmet_sq_destroy(struct nvmet_sq *sq) 759 754 { 755 755 + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; 756 756 + struct nvmet_ctrl *ctrl = sq->ctrl; 757 757 + 760 758 /* 761 759 * If this is the admin queue, complete all AERs so that our 762 760 * queue doesn't have outstanding requests on it. 763 761 */ 764 764 - if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 765 765 - nvmet_async_events_free(sq->ctrl); 762 762 + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { 763 763 + nvmet_async_events_process(ctrl, status); 764 764 + nvmet_async_events_free(ctrl); 765 765 + } 766 766 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 767 767 wait_for_completion(&sq->confirm_done); 768 768 wait_for_completion(&sq->free_done); 769 769 percpu_ref_exit(&sq->ref); 770 770 771 771 - if (sq->ctrl) { 772 772 - nvmet_ctrl_put(sq->ctrl); 771 771 + if (ctrl) { 772 772 + nvmet_ctrl_put(ctrl); 773 773 sq->ctrl = NULL; /* allows reusing the queue later */ 774 774 } 775 775 } ··· 947 937 return true; 948 938 } 949 939 EXPORT_SYMBOL_GPL(nvmet_check_data_len); 940 940 + 941 941 + bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 942 942 + { 943 943 + if (unlikely(data_len > req->transfer_len)) { 944 944 + req->error_loc = offsetof(struct nvme_common_command, dptr); 945 945 + nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 946 946 + return false; 947 947 + } 948 948 + 949 949 + return true; 950 950 + } 950 951 951 952 int nvmet_req_alloc_sgl(struct nvmet_req *req) 952 953 { ··· 1193 1172 1194 1173 ctrl->p2p_client = get_device(req->p2p_client); 1195 1174 1196 1196 - list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) 1175 1175 + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link, 1176 1176 + lockdep_is_held(&ctrl->subsys->lock)) 1197 1177 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1198 1178 } 1199 1179

+10 -5

drivers/nvme/target/fabrics-cmd.c

reviewed

··· 109 109 u16 qid = le16_to_cpu(c->qid); 110 110 u16 sqsize = le16_to_cpu(c->sqsize); 111 111 struct nvmet_ctrl *old; 112 112 + u16 ret; 112 113 113 114 old = cmpxchg(&req->sq->ctrl, NULL, ctrl); 114 115 if (old) { ··· 120 119 if (!sqsize) { 121 120 pr_warn("queue size zero!\n"); 122 121 req->error_loc = offsetof(struct nvmf_connect_command, sqsize); 123 123 - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 122 122 + ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 123 123 + goto err; 124 124 } 125 125 126 126 /* note: convert queue size from 0's-based value to 1's-based value */ ··· 134 132 } 135 133 136 134 if (ctrl->ops->install_queue) { 137 137 - u16 ret = ctrl->ops->install_queue(req->sq); 138 138 - 135 135 + ret = ctrl->ops->install_queue(req->sq); 139 136 if (ret) { 140 137 pr_err("failed to install queue %d cntlid %d ret %x\n", 141 141 - qid, ret, ctrl->cntlid); 142 142 - return ret; 138 138 + qid, ctrl->cntlid, ret); 139 139 + goto err; 143 140 } 144 141 } 145 142 146 143 return 0; 144 144 + 145 145 + err: 146 146 + req->sq->ctrl = NULL; 147 147 + return ret; 147 148 } 148 149 149 150 static void nvmet_execute_admin_connect(struct nvmet_req *req)

+1 -1

drivers/nvme/target/io-cmd-bdev.c

reviewed

··· 280 280 281 281 static void nvmet_bdev_execute_dsm(struct nvmet_req *req) 282 282 { 283 283 - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) 283 283 + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 284 284 return; 285 285 286 286 switch (le32_to_cpu(req->cmd->dsm.attributes)) {

+1 -1

drivers/nvme/target/io-cmd-file.c

reviewed

··· 336 336 337 337 static void nvmet_file_execute_dsm(struct nvmet_req *req) 338 338 { 339 339 - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) 339 339 + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 340 340 return; 341 341 INIT_WORK(&req->f.work, nvmet_file_dsm_work); 342 342 schedule_work(&req->f.work);

drivers/nvme/target/nvmet.h

reviewed

··· 374 374 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); 375 375 void nvmet_req_uninit(struct nvmet_req *req); 376 376 bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); 377 377 + bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); 377 378 void nvmet_req_complete(struct nvmet_req *req, u16 status); 378 379 int nvmet_req_alloc_sgl(struct nvmet_req *req); 379 380 void nvmet_req_free_sgl(struct nvmet_req *req);