Merge tag 'for-5.14/drivers-2021-06-29' of git://git.kernel.dk/linux-block

+1

block/blk-lib.c

··· 21 21 22 22 return new; 23 23 } 24 + EXPORT_SYMBOL_GPL(blk_next_bio); 24 25 25 26 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 26 27 sector_t nr_sects, gfp_t gfp_mask, int flags,

+32

drivers/acpi/device_pm.c

··· 1368 1368 return 1; 1369 1369 } 1370 1370 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); 1371 + 1372 + /** 1373 + * acpi_storage_d3 - Check if D3 should be used in the suspend path 1374 + * @dev: Device to check 1375 + * 1376 + * Return %true if the platform firmware wants @dev to be programmed 1377 + * into D3hot or D3cold (if supported) in the suspend path, or %false 1378 + * when there is no specific preference. On some platforms, if this 1379 + * hint is ignored, @dev may remain unresponsive after suspending the 1380 + * platform as a whole. 1381 + * 1382 + * Although the property has storage in the name it actually is 1383 + * applied to the PCIe slot and plugging in a non-storage device the 1384 + * same platform restrictions will likely apply. 1385 + */ 1386 + bool acpi_storage_d3(struct device *dev) 1387 + { 1388 + struct acpi_device *adev = ACPI_COMPANION(dev); 1389 + u8 val; 1390 + 1391 + if (force_storage_d3()) 1392 + return true; 1393 + 1394 + if (!adev) 1395 + return false; 1396 + if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable", 1397 + &val)) 1398 + return false; 1399 + return val == 1; 1400 + } 1401 + EXPORT_SYMBOL_GPL(acpi_storage_d3); 1402 + 1371 1403 #endif /* CONFIG_PM */

+9

drivers/acpi/internal.h

··· 234 234 static inline void suspend_nvs_restore(void) {} 235 235 #endif 236 236 237 + #ifdef CONFIG_X86 238 + bool force_storage_d3(void); 239 + #else 240 + static inline bool force_storage_d3(void) 241 + { 242 + return false; 243 + } 244 + #endif 245 + 237 246 /*-------------------------------------------------------------------------- 238 247 Device properties 239 248 -------------------------------------------------------------------------- */

+25

drivers/acpi/x86/utils.c

··· 135 135 136 136 return ret; 137 137 } 138 + 139 + /* 140 + * AMD systems from Renoir and Lucienne *require* that the NVME controller 141 + * is put into D3 over a Modern Standby / suspend-to-idle cycle. 142 + * 143 + * This is "typically" accomplished using the `StorageD3Enable` 144 + * property in the _DSD that is checked via the `acpi_storage_d3` function 145 + * but this property was introduced after many of these systems launched 146 + * and most OEM systems don't have it in their BIOS. 147 + * 148 + * The Microsoft documentation for StorageD3Enable mentioned that Windows has 149 + * a hardcoded allowlist for D3 support, which was used for these platforms. 150 + * 151 + * This allows quirking on Linux in a similar fashion. 152 + */ 153 + static const struct x86_cpu_id storage_d3_cpu_ids[] = { 154 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */ 155 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */ 156 + {} 157 + }; 158 + 159 + bool force_storage_d3(void) 160 + { 161 + return x86_match_cpu(storage_d3_cpu_ids); 162 + }

+1 -3

drivers/block/aoe/aoechr.c

··· 140 140 } 141 141 142 142 mp = kmemdup(msg, n, GFP_ATOMIC); 143 - if (mp == NULL) { 144 - printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n); 143 + if (!mp) 145 144 goto bail; 146 - } 147 145 148 146 em->msg = mp; 149 147 em->flags |= EMFL_VALID;

+6 -16

drivers/block/drbd/drbd_receiver.c

··· 3770 3770 } 3771 3771 3772 3772 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 3773 - if (!new_net_conf) { 3774 - drbd_err(connection, "Allocation of new net_conf failed\n"); 3773 + if (!new_net_conf) 3775 3774 goto disconnect; 3776 - } 3777 3775 3778 3776 mutex_lock(&connection->data.mutex); 3779 3777 mutex_lock(&connection->resource->conf_update); ··· 4018 4020 4019 4021 if (verify_tfm || csums_tfm) { 4020 4022 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 4021 - if (!new_net_conf) { 4022 - drbd_err(device, "Allocation of new net_conf failed\n"); 4023 + if (!new_net_conf) 4023 4024 goto disconnect; 4024 - } 4025 4025 4026 4026 *new_net_conf = *old_net_conf; 4027 4027 ··· 4157 4161 4158 4162 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 4159 4163 if (!new_disk_conf) { 4160 - drbd_err(device, "Allocation of new disk_conf failed\n"); 4161 4164 put_ldev(device); 4162 4165 return -ENOMEM; 4163 4166 } ··· 4283 4288 device = peer_device->device; 4284 4289 4285 4290 p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); 4286 - if (!p_uuid) { 4287 - drbd_err(device, "kmalloc of p_uuid failed\n"); 4291 + if (!p_uuid) 4288 4292 return false; 4289 - } 4290 4293 4291 4294 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 4292 4295 p_uuid[i] = be64_to_cpu(p->uuid[i]); ··· 5477 5484 } 5478 5485 5479 5486 peers_ch = kmalloc(pi.size, GFP_NOIO); 5480 - if (peers_ch == NULL) { 5481 - drbd_err(connection, "kmalloc of peers_ch failed\n"); 5487 + if (!peers_ch) { 5482 5488 rv = -1; 5483 5489 goto fail; 5484 5490 } ··· 5496 5504 5497 5505 resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm); 5498 5506 response = kmalloc(resp_size, GFP_NOIO); 5499 - if (response == NULL) { 5500 - drbd_err(connection, "kmalloc of response failed\n"); 5507 + if (!response) { 5501 5508 rv = -1; 5502 5509 goto fail; 5503 5510 } ··· 5543 5552 } 5544 5553 5545 5554 right_response = kmalloc(resp_size, GFP_NOIO); 5546 - if (right_response == NULL) { 5547 - drbd_err(connection, "kmalloc of right_response failed\n"); 5555 + if (!right_response) { 5548 5556 rv = -1; 5549 5557 goto fail; 5550 5558 }

+1 -1

drivers/block/floppy.c

··· 2123 2123 switch (interpret_errors()) { 2124 2124 case 1: 2125 2125 cont->error(); 2126 + break; 2126 2127 case 2: 2127 2128 break; 2128 2129 case 0: ··· 2331 2330 if (!drive_state[current_drive].first_read_date) 2332 2331 drive_state[current_drive].first_read_date = jiffies; 2333 2332 2334 - nr_sectors = 0; 2335 2333 ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); 2336 2334 2337 2335 if (reply_buffer[ST1] & ST1_EOC)

+1

drivers/block/loop.c

··· 1234 1234 blk_queue_physical_block_size(lo->lo_queue, bsize); 1235 1235 blk_queue_io_min(lo->lo_queue, bsize); 1236 1236 1237 + loop_config_discard(lo); 1237 1238 loop_update_rotational(lo); 1238 1239 loop_update_dio(lo); 1239 1240 loop_sysfs_init(lo);

+5 -21

drivers/block/mtip32xx/mtip32xx.c

··· 2238 2238 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, 2239 2239 size_t len, loff_t *offset) 2240 2240 { 2241 - struct driver_data *dd = (struct driver_data *)f->private_data; 2242 2241 int size = *offset; 2243 2242 char *buf; 2244 2243 int rv = 0; ··· 2246 2247 return 0; 2247 2248 2248 2249 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); 2249 - if (!buf) { 2250 - dev_err(&dd->pdev->dev, 2251 - "Memory allocation: status buffer\n"); 2250 + if (!buf) 2252 2251 return -ENOMEM; 2253 - } 2254 2252 2255 2253 size += show_device_status(NULL, buf); 2256 2254 ··· 2273 2277 return 0; 2274 2278 2275 2279 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); 2276 - if (!buf) { 2277 - dev_err(&dd->pdev->dev, 2278 - "Memory allocation: register buffer\n"); 2280 + if (!buf) 2279 2281 return -ENOMEM; 2280 - } 2281 2282 2282 2283 size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); 2283 2284 ··· 2336 2343 return 0; 2337 2344 2338 2345 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); 2339 - if (!buf) { 2340 - dev_err(&dd->pdev->dev, 2341 - "Memory allocation: flag buffer\n"); 2346 + if (!buf) 2342 2347 return -ENOMEM; 2343 - } 2344 2348 2345 2349 size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", 2346 2350 dd->port->flags); ··· 2874 2884 2875 2885 dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, 2876 2886 dd->numa_node); 2877 - if (!dd->port) { 2878 - dev_err(&dd->pdev->dev, 2879 - "Memory allocation: port structure\n"); 2887 + if (!dd->port) 2880 2888 return -ENOMEM; 2881 - } 2882 2889 2883 2890 /* Continue workqueue setup */ 2884 2891 for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) ··· 3989 4002 cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); 3990 4003 3991 4004 dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); 3992 - if (dd == NULL) { 3993 - dev_err(&pdev->dev, 3994 - "Unable to allocate memory for driver data\n"); 4005 + if (!dd) 3995 4006 return -ENOMEM; 3996 - } 3997 4007 3998 4008 /* Attach the private data to this PCI device. */ 3999 4009 pci_set_drvdata(pdev, dd);

+2 -4

drivers/block/rsxx/dma.c

··· 74 74 struct rsxx_dma *dma; 75 75 }; 76 76 77 - #define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ 78 - (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) 79 - 80 77 struct dma_tracker_list { 81 78 spinlock_t lock; 82 79 int head; ··· 805 808 806 809 memset(&ctrl->stats, 0, sizeof(ctrl->stats)); 807 810 808 - ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); 811 + ctrl->trackers = vmalloc(struct_size(ctrl->trackers, list, 812 + RSXX_MAX_OUTSTANDING_CMDS)); 809 813 if (!ctrl->trackers) 810 814 return -ENOMEM; 811 815

+1 -2

drivers/block/sunvdc.c

··· 981 981 } 982 982 983 983 port = kzalloc(sizeof(*port), GFP_KERNEL); 984 - err = -ENOMEM; 985 984 if (!port) { 986 - printk(KERN_ERR PFX "Cannot allocate vdc_port.\n"); 985 + err = -ENOMEM; 987 986 goto err_out_release_mdesc; 988 987 } 989 988

-2

drivers/block/sx8.c

··· 1420 1420 1421 1421 host = kzalloc(sizeof(*host), GFP_KERNEL); 1422 1422 if (!host) { 1423 - printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n", 1424 - pci_name(pdev)); 1425 1423 rc = -ENOMEM; 1426 1424 goto err_out_regions; 1427 1425 }

+2 -8

drivers/block/z2ram.c

··· 236 236 237 237 case Z2MINOR_Z2ONLY: 238 238 z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); 239 - if (z2ram_map == NULL) { 240 - printk(KERN_ERR DEVICE_NAME 241 - ": cannot get mem for z2ram_map\n"); 239 + if (!z2ram_map) 242 240 goto err_out; 243 - } 244 241 245 242 get_z2ram(); 246 243 ··· 250 253 251 254 case Z2MINOR_CHIPONLY: 252 255 z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); 253 - if (z2ram_map == NULL) { 254 - printk(KERN_ERR DEVICE_NAME 255 - ": cannot get mem for z2ram_map\n"); 256 + if (!z2ram_map) 256 257 goto err_out; 257 - } 258 258 259 259 get_chipram(); 260 260

+3 -3

drivers/md/Kconfig

··· 47 47 If unsure, say Y. 48 48 49 49 config MD_LINEAR 50 - tristate "Linear (append) mode" 50 + tristate "Linear (append) mode (deprecated)" 51 51 depends on BLK_DEV_MD 52 52 help 53 53 If you say Y here, then your multiple devices driver will be able to ··· 158 158 If unsure, say Y. 159 159 160 160 config MD_MULTIPATH 161 - tristate "Multipath I/O support" 161 + tristate "Multipath I/O support (deprecated)" 162 162 depends on BLK_DEV_MD 163 163 help 164 164 MD_MULTIPATH provides a simple multi-path personality for use ··· 169 169 If unsure, say N. 170 170 171 171 config MD_FAULTY 172 - tristate "Faulty test module for MD" 172 + tristate "Faulty test module for MD (deprecated)" 173 173 depends on BLK_DEV_MD 174 174 help 175 175 The "faulty" module allows for a block device that occasionally returns

+1 -1

drivers/md/md-bitmap.c

··· 2616 2616 &max_backlog_used.attr, 2617 2617 NULL 2618 2618 }; 2619 - struct attribute_group md_bitmap_group = { 2619 + const struct attribute_group md_bitmap_group = { 2620 2620 .name = "bitmap", 2621 2621 .attrs = md_bitmap_attrs, 2622 2622 };

+1 -1

drivers/md/md-faulty.c

··· 357 357 module_init(raid_init); 358 358 module_exit(raid_exit); 359 359 MODULE_LICENSE("GPL"); 360 - MODULE_DESCRIPTION("Fault injection personality for MD"); 360 + MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)"); 361 361 MODULE_ALIAS("md-personality-10"); /* faulty */ 362 362 MODULE_ALIAS("md-faulty"); 363 363 MODULE_ALIAS("md-level--5");

+1 -1

drivers/md/md-linear.c

··· 312 312 module_init(linear_init); 313 313 module_exit(linear_exit); 314 314 MODULE_LICENSE("GPL"); 315 - MODULE_DESCRIPTION("Linear device concatenation personality for MD"); 315 + MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); 316 316 MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ 317 317 MODULE_ALIAS("md-linear"); 318 318 MODULE_ALIAS("md-level--1");

+1 -1

drivers/md/md-multipath.c

··· 471 471 module_init(multipath_init); 472 472 module_exit(multipath_exit); 473 473 MODULE_LICENSE("GPL"); 474 - MODULE_DESCRIPTION("simple multi-path personality for MD"); 474 + MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)"); 475 475 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ 476 476 MODULE_ALIAS("md-multipath"); 477 477 MODULE_ALIAS("md-level--4");

+65 -51

drivers/md/md.c

··· 441 441 } 442 442 EXPORT_SYMBOL(md_handle_request); 443 443 444 - struct md_io { 445 - struct mddev *mddev; 446 - bio_end_io_t *orig_bi_end_io; 447 - void *orig_bi_private; 448 - struct block_device *orig_bi_bdev; 449 - unsigned long start_time; 450 - }; 451 - 452 - static void md_end_io(struct bio *bio) 453 - { 454 - struct md_io *md_io = bio->bi_private; 455 - struct mddev *mddev = md_io->mddev; 456 - 457 - bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev); 458 - 459 - bio->bi_end_io = md_io->orig_bi_end_io; 460 - bio->bi_private = md_io->orig_bi_private; 461 - 462 - mempool_free(md_io, &mddev->md_io_pool); 463 - 464 - if (bio->bi_end_io) 465 - bio->bi_end_io(bio); 466 - } 467 - 468 444 static blk_qc_t md_submit_bio(struct bio *bio) 469 445 { 470 446 const int rw = bio_data_dir(bio); ··· 463 487 bio->bi_status = BLK_STS_IOERR; 464 488 bio_endio(bio); 465 489 return BLK_QC_T_NONE; 466 - } 467 - 468 - if (bio->bi_end_io != md_end_io) { 469 - struct md_io *md_io; 470 - 471 - md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO); 472 - md_io->mddev = mddev; 473 - md_io->orig_bi_end_io = bio->bi_end_io; 474 - md_io->orig_bi_private = bio->bi_private; 475 - md_io->orig_bi_bdev = bio->bi_bdev; 476 - 477 - bio->bi_end_io = md_end_io; 478 - bio->bi_private = md_io; 479 - 480 - md_io->start_time = bio_start_io_acct(bio); 481 490 } 482 491 483 492 /* bio could be mergeable after passing to underlayer */ ··· 785 824 return ERR_PTR(error); 786 825 } 787 826 788 - static struct attribute_group md_redundancy_group; 827 + static const struct attribute_group md_redundancy_group; 789 828 790 829 void mddev_unlock(struct mddev *mddev) 791 830 { ··· 802 841 * test it under the same mutex to ensure its correct value 803 842 * is seen. 804 843 */ 805 - struct attribute_group *to_remove = mddev->to_remove; 844 + const struct attribute_group *to_remove = mddev->to_remove; 806 845 mddev->to_remove = NULL; 807 846 mddev->sysfs_active = 1; 808 847 mutex_unlock(&mddev->reconfig_mutex); ··· 2340 2379 bdev_get_integrity(reference->bdev)); 2341 2380 2342 2381 pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); 2343 - if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { 2382 + if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || 2383 + (mddev->level != 1 && mddev->level != 10 && 2384 + bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) { 2385 + /* 2386 + * No need to handle the failure of bioset_integrity_create, 2387 + * because the function is called by md_run() -> pers->run(), 2388 + * md_run calls bioset_exit -> bioset_integrity_free in case 2389 + * of failure case. 2390 + */ 2344 2391 pr_err("md: failed to create integrity pool for %s\n", 2345 2392 mdname(mddev)); 2346 2393 return -EINVAL; ··· 5507 5538 &md_degraded.attr, 5508 5539 NULL, 5509 5540 }; 5510 - static struct attribute_group md_redundancy_group = { 5541 + static const struct attribute_group md_redundancy_group = { 5511 5542 .name = NULL, 5512 5543 .attrs = md_redundancy_attrs, 5513 5544 }; ··· 5575 5606 5576 5607 bioset_exit(&mddev->bio_set); 5577 5608 bioset_exit(&mddev->sync_set); 5578 - mempool_exit(&mddev->md_io_pool); 5609 + if (mddev->level != 1 && mddev->level != 10) 5610 + bioset_exit(&mddev->io_acct_set); 5579 5611 kfree(mddev); 5580 5612 } 5581 5613 ··· 5672 5702 * Creating /dev/mdNNN via "newarray", so adjust hold_active. 5673 5703 */ 5674 5704 mddev->hold_active = UNTIL_STOP; 5675 - 5676 - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, 5677 - sizeof(struct md_io)); 5678 - if (error) 5679 - goto abort; 5680 5705 5681 5706 error = -ENOMEM; 5682 5707 disk = blk_alloc_disk(NUMA_NO_NODE); ··· 5865 5900 if (!bioset_initialized(&mddev->sync_set)) { 5866 5901 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); 5867 5902 if (err) 5868 - return err; 5903 + goto exit_bio_set; 5904 + } 5905 + if (mddev->level != 1 && mddev->level != 10 && 5906 + !bioset_initialized(&mddev->io_acct_set)) { 5907 + err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, 5908 + offsetof(struct md_io_acct, bio_clone), 0); 5909 + if (err) 5910 + goto exit_sync_set; 5869 5911 } 5870 5912 5871 5913 spin_lock(&pers_lock); ··· 6000 6028 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); 6001 6029 else 6002 6030 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); 6031 + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue); 6003 6032 } 6004 6033 if (pers->sync_request) { 6005 6034 if (mddev->kobj.sd && ··· 6050 6077 module_put(pers->owner); 6051 6078 md_bitmap_destroy(mddev); 6052 6079 abort: 6053 - bioset_exit(&mddev->bio_set); 6080 + if (mddev->level != 1 && mddev->level != 10) 6081 + bioset_exit(&mddev->io_acct_set); 6082 + exit_sync_set: 6054 6083 bioset_exit(&mddev->sync_set); 6084 + exit_bio_set: 6085 + bioset_exit(&mddev->bio_set); 6055 6086 return err; 6056 6087 } 6057 6088 EXPORT_SYMBOL_GPL(md_run); ··· 6279 6302 __md_stop(mddev); 6280 6303 bioset_exit(&mddev->bio_set); 6281 6304 bioset_exit(&mddev->sync_set); 6305 + if (mddev->level != 1 && mddev->level != 10) 6306 + bioset_exit(&mddev->io_acct_set); 6282 6307 } 6283 6308 6284 6309 EXPORT_SYMBOL_GPL(md_stop); ··· 8584 8605 submit_bio_noacct(discard_bio); 8585 8606 } 8586 8607 EXPORT_SYMBOL_GPL(md_submit_discard_bio); 8608 + 8609 + static void md_end_io_acct(struct bio *bio) 8610 + { 8611 + struct md_io_acct *md_io_acct = bio->bi_private; 8612 + struct bio *orig_bio = md_io_acct->orig_bio; 8613 + 8614 + orig_bio->bi_status = bio->bi_status; 8615 + 8616 + bio_end_io_acct(orig_bio, md_io_acct->start_time); 8617 + bio_put(bio); 8618 + bio_endio(orig_bio); 8619 + } 8620 + 8621 + /* 8622 + * Used by personalities that don't already clone the bio and thus can't 8623 + * easily add the timestamp to their extended bio structure. 8624 + */ 8625 + void md_account_bio(struct mddev *mddev, struct bio **bio) 8626 + { 8627 + struct md_io_acct *md_io_acct; 8628 + struct bio *clone; 8629 + 8630 + if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue)) 8631 + return; 8632 + 8633 + clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set); 8634 + md_io_acct = container_of(clone, struct md_io_acct, bio_clone); 8635 + md_io_acct->orig_bio = *bio; 8636 + md_io_acct->start_time = bio_start_io_acct(*bio); 8637 + 8638 + clone->bi_end_io = md_end_io_acct; 8639 + clone->bi_private = md_io_acct; 8640 + *bio = clone; 8641 + } 8642 + EXPORT_SYMBOL_GPL(md_account_bio); 8587 8643 8588 8644 /* md_allow_write(mddev) 8589 8645 * Calling this ensures that the array is marked 'active' so that writes

+10 -3

drivers/md/md.h

··· 481 481 atomic_t max_corr_read_errors; /* max read retries */ 482 482 struct list_head all_mddevs; 483 483 484 - struct attribute_group *to_remove; 484 + const struct attribute_group *to_remove; 485 485 486 486 struct bio_set bio_set; 487 487 struct bio_set sync_set; /* for sync operations like 488 488 * metadata and bitmap writes 489 489 */ 490 - mempool_t md_io_pool; 490 + struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ 491 491 492 492 /* Generic flush handling. 493 493 * The last to finish preflush schedules a worker to submit ··· 613 613 ssize_t (*show)(struct mddev *, char *); 614 614 ssize_t (*store)(struct mddev *, const char *, size_t); 615 615 }; 616 - extern struct attribute_group md_bitmap_group; 616 + extern const struct attribute_group md_bitmap_group; 617 617 618 618 static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name) 619 619 { ··· 684 684 void *private; 685 685 }; 686 686 687 + struct md_io_acct { 688 + struct bio *orig_bio; 689 + unsigned long start_time; 690 + struct bio bio_clone; 691 + }; 692 + 687 693 #define THREAD_WAKEUP 0 688 694 689 695 static inline void safe_put_page(struct page *p) ··· 721 715 extern void md_finish_reshape(struct mddev *mddev); 722 716 void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, 723 717 struct bio *bio, sector_t start, sector_t size); 718 + void md_account_bio(struct mddev *mddev, struct bio **bio); 724 719 725 720 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); 726 721 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,

+3

drivers/md/raid0.c

··· 546 546 bio = split; 547 547 } 548 548 549 + if (bio->bi_pool != &mddev->bio_set) 550 + md_account_bio(mddev, &bio); 551 + 549 552 orig_sector = sector; 550 553 zone = find_zone(mddev->private, &sector); 551 554 switch (conf->layout) {

+11 -4

drivers/md/raid1.c

··· 300 300 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 301 301 bio->bi_status = BLK_STS_IOERR; 302 302 303 + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 304 + bio_end_io_acct(bio, r1_bio->start_time); 303 305 bio_endio(bio); 304 306 } 305 307 ··· 1212 1210 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 1213 1211 int max_sectors; 1214 1212 int rdisk; 1215 - bool print_msg = !!r1_bio; 1213 + bool r1bio_existed = !!r1_bio; 1216 1214 char b[BDEVNAME_SIZE]; 1217 1215 1218 1216 /* ··· 1222 1220 */ 1223 1221 gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; 1224 1222 1225 - if (print_msg) { 1223 + if (r1bio_existed) { 1226 1224 /* Need to get the block device name carefully */ 1227 1225 struct md_rdev *rdev; 1228 1226 rcu_read_lock(); ··· 1254 1252 1255 1253 if (rdisk < 0) { 1256 1254 /* couldn't find anywhere to read from */ 1257 - if (print_msg) { 1255 + if (r1bio_existed) { 1258 1256 pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", 1259 1257 mdname(mddev), 1260 1258 b, ··· 1265 1263 } 1266 1264 mirror = conf->mirrors + rdisk; 1267 1265 1268 - if (print_msg) 1266 + if (r1bio_existed) 1269 1267 pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", 1270 1268 mdname(mddev), 1271 1269 (unsigned long long)r1_bio->sector, ··· 1293 1291 } 1294 1292 1295 1293 r1_bio->read_disk = rdisk; 1294 + 1295 + if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 1296 + r1_bio->start_time = bio_start_io_acct(bio); 1296 1297 1297 1298 read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); 1298 1299 ··· 1466 1461 r1_bio->sectors = max_sectors; 1467 1462 } 1468 1463 1464 + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 1465 + r1_bio->start_time = bio_start_io_acct(bio); 1469 1466 atomic_set(&r1_bio->remaining, 1); 1470 1467 atomic_set(&r1_bio->behind_remaining, 0); 1471 1468

+1

drivers/md/raid1.h

··· 158 158 sector_t sector; 159 159 int sectors; 160 160 unsigned long state; 161 + unsigned long start_time; 161 162 struct mddev *mddev; 162 163 /* 163 164 * original bio going to /dev/mdx

+6

drivers/md/raid10.c

··· 297 297 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) 298 298 bio->bi_status = BLK_STS_IOERR; 299 299 300 + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 301 + bio_end_io_acct(bio, r10_bio->start_time); 300 302 bio_endio(bio); 301 303 /* 302 304 * Wake up any possible resync thread that waits for the device ··· 1186 1184 } 1187 1185 slot = r10_bio->read_slot; 1188 1186 1187 + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 1188 + r10_bio->start_time = bio_start_io_acct(bio); 1189 1189 read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); 1190 1190 1191 1191 r10_bio->devs[slot].bio = read_bio; ··· 1487 1483 r10_bio->master_bio = bio; 1488 1484 } 1489 1485 1486 + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) 1487 + r10_bio->start_time = bio_start_io_acct(bio); 1490 1488 atomic_set(&r10_bio->remaining, 1); 1491 1489 md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); 1492 1490

+1

drivers/md/raid10.h

··· 124 124 sector_t sector; /* virtual sector number */ 125 125 int sectors; 126 126 unsigned long state; 127 + unsigned long start_time; 127 128 struct mddev *mddev; 128 129 /* 129 130 * original bio going to /dev/mdx

+45 -18

drivers/md/raid5.c

··· 5362 5362 */ 5363 5363 static void raid5_align_endio(struct bio *bi) 5364 5364 { 5365 - struct bio* raid_bi = bi->bi_private; 5365 + struct md_io_acct *md_io_acct = bi->bi_private; 5366 + struct bio *raid_bi = md_io_acct->orig_bio; 5366 5367 struct mddev *mddev; 5367 5368 struct r5conf *conf; 5368 5369 struct md_rdev *rdev; 5369 5370 blk_status_t error = bi->bi_status; 5371 + unsigned long start_time = md_io_acct->start_time; 5370 5372 5371 5373 bio_put(bi); 5372 5374 ··· 5380 5378 rdev_dec_pending(rdev, conf->mddev); 5381 5379 5382 5380 if (!error) { 5381 + if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue)) 5382 + bio_end_io_acct(raid_bi, start_time); 5383 5383 bio_endio(raid_bi); 5384 5384 if (atomic_dec_and_test(&conf->active_aligned_reads)) 5385 5385 wake_up(&conf->wait_for_quiescent); ··· 5400 5396 struct md_rdev *rdev; 5401 5397 sector_t sector, end_sector, first_bad; 5402 5398 int bad_sectors, dd_idx; 5399 + struct md_io_acct *md_io_acct; 5400 + bool did_inc; 5403 5401 5404 5402 if (!in_chunk_boundary(mddev, raid_bio)) { 5405 5403 pr_debug("%s: non aligned\n", __func__); ··· 5431 5425 atomic_inc(&rdev->nr_pending); 5432 5426 rcu_read_unlock(); 5433 5427 5434 - align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); 5435 - bio_set_dev(align_bio, rdev->bdev); 5436 - align_bio->bi_end_io = raid5_align_endio; 5437 - align_bio->bi_private = raid_bio; 5438 - align_bio->bi_iter.bi_sector = sector; 5439 - 5440 - raid_bio->bi_next = (void *)rdev; 5441 - 5442 - if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad, 5428 + if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, 5443 5429 &bad_sectors)) { 5444 - bio_put(align_bio); 5430 + bio_put(raid_bio); 5445 5431 rdev_dec_pending(rdev, mddev); 5446 5432 return 0; 5447 5433 } 5448 5434 5435 + align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set); 5436 + md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone); 5437 + raid_bio->bi_next = (void *)rdev; 5438 + if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue)) 5439 + md_io_acct->start_time = bio_start_io_acct(raid_bio); 5440 + md_io_acct->orig_bio = raid_bio; 5441 + 5442 + bio_set_dev(align_bio, rdev->bdev); 5443 + align_bio->bi_end_io = raid5_align_endio; 5444 + align_bio->bi_private = md_io_acct; 5445 + align_bio->bi_iter.bi_sector = sector; 5446 + 5449 5447 /* No reshape active, so we can trust rdev->data_offset */ 5450 5448 align_bio->bi_iter.bi_sector += rdev->data_offset; 5451 5449 5452 - spin_lock_irq(&conf->device_lock); 5453 - wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, 5454 - conf->device_lock); 5455 - atomic_inc(&conf->active_aligned_reads); 5456 - spin_unlock_irq(&conf->device_lock); 5450 + did_inc = false; 5451 + if (conf->quiesce == 0) { 5452 + atomic_inc(&conf->active_aligned_reads); 5453 + did_inc = true; 5454 + } 5455 + /* need a memory barrier to detect the race with raid5_quiesce() */ 5456 + if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) { 5457 + /* quiesce is in progress, so we need to undo io activation and wait 5458 + * for it to finish 5459 + */ 5460 + if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads)) 5461 + wake_up(&conf->wait_for_quiescent); 5462 + spin_lock_irq(&conf->device_lock); 5463 + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, 5464 + conf->device_lock); 5465 + atomic_inc(&conf->active_aligned_reads); 5466 + spin_unlock_irq(&conf->device_lock); 5467 + } 5457 5468 5458 5469 if (mddev->gendisk) 5459 5470 trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), ··· 5819 5796 last_sector = bio_end_sector(bi); 5820 5797 bi->bi_next = NULL; 5821 5798 5799 + md_account_bio(mddev, &bi); 5822 5800 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 5823 5801 for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { 5824 5802 int previous; ··· 6952 6928 &ppl_write_hint.attr, 6953 6929 NULL, 6954 6930 }; 6955 - static struct attribute_group raid5_attrs_group = { 6931 + static const struct attribute_group raid5_attrs_group = { 6956 6932 .name = NULL, 6957 6933 .attrs = raid5_attrs, 6958 6934 }; ··· 8358 8334 * active stripes can drain 8359 8335 */ 8360 8336 r5c_flush_cache(conf, INT_MAX); 8361 - conf->quiesce = 2; 8337 + /* need a memory barrier to make sure read_one_chunk() sees 8338 + * quiesce started and reverts to slow (locked) path. 8339 + */ 8340 + smp_store_release(&conf->quiesce, 2); 8362 8341 wait_event_cmd(conf->wait_for_quiescent, 8363 8342 atomic_read(&conf->active_stripes) == 0 && 8364 8343 atomic_read(&conf->active_aligned_reads) == 0,

+1 -1

drivers/nvme/host/Kconfig

··· 21 21 help 22 22 This option enables support for multipath access to NVMe 23 23 subsystems. If this option is enabled only a single 24 - /dev/nvmeXnY device will show up for each NVMe namespaces, 24 + /dev/nvmeXnY device will show up for each NVMe namespace, 25 25 even if it is accessible through multiple controllers. 26 26 27 27 config NVME_HWMON

+127 -65

drivers/nvme/host/core.c

··· 57 57 module_param(force_apst, bool, 0644); 58 58 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); 59 59 60 + static unsigned long apst_primary_timeout_ms = 100; 61 + module_param(apst_primary_timeout_ms, ulong, 0644); 62 + MODULE_PARM_DESC(apst_primary_timeout_ms, 63 + "primary APST timeout in ms"); 64 + 65 + static unsigned long apst_secondary_timeout_ms = 2000; 66 + module_param(apst_secondary_timeout_ms, ulong, 0644); 67 + MODULE_PARM_DESC(apst_secondary_timeout_ms, 68 + "secondary APST timeout in ms"); 69 + 70 + static unsigned long apst_primary_latency_tol_us = 15000; 71 + module_param(apst_primary_latency_tol_us, ulong, 0644); 72 + MODULE_PARM_DESC(apst_primary_latency_tol_us, 73 + "primary APST latency tolerance in us"); 74 + 75 + static unsigned long apst_secondary_latency_tol_us = 100000; 76 + module_param(apst_secondary_latency_tol_us, ulong, 0644); 77 + MODULE_PARM_DESC(apst_secondary_latency_tol_us, 78 + "secondary APST latency tolerance in us"); 79 + 60 80 static bool streams; 61 81 module_param(streams, bool, 0644); 62 82 MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); ··· 721 701 722 702 static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) 723 703 { 724 - struct nvme_command c; 725 - 726 - memset(&c, 0, sizeof(c)); 704 + struct nvme_command c = { }; 727 705 728 706 c.directive.opcode = nvme_admin_directive_send; 729 707 c.directive.nsid = cpu_to_le32(NVME_NSID_ALL); ··· 746 728 static int nvme_get_stream_params(struct nvme_ctrl *ctrl, 747 729 struct streams_directive_params *s, u32 nsid) 748 730 { 749 - struct nvme_command c; 731 + struct nvme_command c = { }; 750 732 751 - memset(&c, 0, sizeof(c)); 752 733 memset(s, 0, sizeof(*s)); 753 734 754 735 c.directive.opcode = nvme_admin_directive_recv; ··· 1457 1440 unsigned int dword11, void *buffer, size_t buflen, u32 *result) 1458 1441 { 1459 1442 union nvme_result res = { 0 }; 1460 - struct nvme_command c; 1443 + struct nvme_command c = { }; 1461 1444 int ret; 1462 1445 1463 - memset(&c, 0, sizeof(c)); 1464 1446 c.features.opcode = op; 1465 1447 c.features.fid = cpu_to_le32(fid); 1466 1448 c.features.dword11 = cpu_to_le32(dword11); ··· 1538 1522 queue_work(nvme_wq, &ctrl->async_event_work); 1539 1523 } 1540 1524 1541 - /* 1542 - * Issue ioctl requests on the first available path. Note that unlike normal 1543 - * block layer requests we will not retry failed request on another controller. 1544 - */ 1545 - struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, 1546 - struct nvme_ns_head **head, int *srcu_idx) 1547 - { 1548 - #ifdef CONFIG_NVME_MULTIPATH 1549 - if (disk->fops == &nvme_ns_head_ops) { 1550 - struct nvme_ns *ns; 1551 - 1552 - *head = disk->private_data; 1553 - *srcu_idx = srcu_read_lock(&(*head)->srcu); 1554 - ns = nvme_find_path(*head); 1555 - if (!ns) 1556 - srcu_read_unlock(&(*head)->srcu, *srcu_idx); 1557 - return ns; 1558 - } 1559 - #endif 1560 - *head = NULL; 1561 - *srcu_idx = -1; 1562 - return disk->private_data; 1563 - } 1564 - 1565 - void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) 1566 - { 1567 - if (head) 1568 - srcu_read_unlock(&head->srcu, idx); 1569 - } 1570 - 1571 1525 static int nvme_ns_open(struct nvme_ns *ns) 1572 1526 { 1573 1527 ··· 1587 1601 static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, 1588 1602 u32 max_integrity_segments) 1589 1603 { 1590 - struct blk_integrity integrity; 1604 + struct blk_integrity integrity = { }; 1591 1605 1592 - memset(&integrity, 0, sizeof(integrity)); 1593 1606 switch (pi_type) { 1594 1607 case NVME_NS_DPS_PI_TYPE3: 1595 1608 integrity.profile = &t10_pi_type3_crc; ··· 1933 1948 } 1934 1949 }; 1935 1950 1951 + static int nvme_send_ns_head_pr_command(struct block_device *bdev, 1952 + struct nvme_command *c, u8 data[16]) 1953 + { 1954 + struct nvme_ns_head *head = bdev->bd_disk->private_data; 1955 + int srcu_idx = srcu_read_lock(&head->srcu); 1956 + struct nvme_ns *ns = nvme_find_path(head); 1957 + int ret = -EWOULDBLOCK; 1958 + 1959 + if (ns) { 1960 + c->common.nsid = cpu_to_le32(ns->head->ns_id); 1961 + ret = nvme_submit_sync_cmd(ns->queue, c, data, 16); 1962 + } 1963 + srcu_read_unlock(&head->srcu, srcu_idx); 1964 + return ret; 1965 + } 1966 + 1967 + static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c, 1968 + u8 data[16]) 1969 + { 1970 + c->common.nsid = cpu_to_le32(ns->head->ns_id); 1971 + return nvme_submit_sync_cmd(ns->queue, c, data, 16); 1972 + } 1973 + 1936 1974 static int nvme_pr_command(struct block_device *bdev, u32 cdw10, 1937 1975 u64 key, u64 sa_key, u8 op) 1938 1976 { 1939 - struct nvme_ns_head *head = NULL; 1940 - struct nvme_ns *ns; 1941 - struct nvme_command c; 1942 - int srcu_idx, ret; 1977 + struct nvme_command c = { }; 1943 1978 u8 data[16] = { 0, }; 1944 - 1945 - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 1946 - if (unlikely(!ns)) 1947 - return -EWOULDBLOCK; 1948 1979 1949 1980 put_unaligned_le64(key, &data[0]); 1950 1981 put_unaligned_le64(sa_key, &data[8]); 1951 1982 1952 - memset(&c, 0, sizeof(c)); 1953 1983 c.common.opcode = op; 1954 - c.common.nsid = cpu_to_le32(ns->head->ns_id); 1955 1984 c.common.cdw10 = cpu_to_le32(cdw10); 1956 1985 1957 - ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); 1958 - nvme_put_ns_from_disk(head, srcu_idx); 1959 - return ret; 1986 + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && 1987 + bdev->bd_disk->fops == &nvme_ns_head_ops) 1988 + return nvme_send_ns_head_pr_command(bdev, &c, data); 1989 + return nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c, data); 1960 1990 } 1961 1991 1962 1992 static int nvme_pr_register(struct block_device *bdev, u64 old, ··· 2036 2036 bool send) 2037 2037 { 2038 2038 struct nvme_ctrl *ctrl = data; 2039 - struct nvme_command cmd; 2039 + struct nvme_command cmd = { }; 2040 2040 2041 - memset(&cmd, 0, sizeof(cmd)); 2042 2041 if (send) 2043 2042 cmd.common.opcode = nvme_admin_security_send; 2044 2043 else ··· 2051 2052 } 2052 2053 EXPORT_SYMBOL_GPL(nvme_sec_submit); 2053 2054 #endif /* CONFIG_BLK_SED_OPAL */ 2055 + 2056 + #ifdef CONFIG_BLK_DEV_ZONED 2057 + static int nvme_report_zones(struct gendisk *disk, sector_t sector, 2058 + unsigned int nr_zones, report_zones_cb cb, void *data) 2059 + { 2060 + return nvme_ns_report_zones(disk->private_data, sector, nr_zones, cb, 2061 + data); 2062 + } 2063 + #else 2064 + #define nvme_report_zones NULL 2065 + #endif /* CONFIG_BLK_DEV_ZONED */ 2054 2066 2055 2067 static const struct block_device_operations nvme_bdev_ops = { 2056 2068 .owner = THIS_MODULE, ··· 2228 2218 } 2229 2219 2230 2220 /* 2221 + * The function checks whether the given total (exlat + enlat) latency of 2222 + * a power state allows the latter to be used as an APST transition target. 2223 + * It does so by comparing the latency to the primary and secondary latency 2224 + * tolerances defined by module params. If there's a match, the corresponding 2225 + * timeout value is returned and the matching tolerance index (1 or 2) is 2226 + * reported. 2227 + */ 2228 + static bool nvme_apst_get_transition_time(u64 total_latency, 2229 + u64 *transition_time, unsigned *last_index) 2230 + { 2231 + if (total_latency <= apst_primary_latency_tol_us) { 2232 + if (*last_index == 1) 2233 + return false; 2234 + *last_index = 1; 2235 + *transition_time = apst_primary_timeout_ms; 2236 + return true; 2237 + } 2238 + if (apst_secondary_timeout_ms && 2239 + total_latency <= apst_secondary_latency_tol_us) { 2240 + if (*last_index <= 2) 2241 + return false; 2242 + *last_index = 2; 2243 + *transition_time = apst_secondary_timeout_ms; 2244 + return true; 2245 + } 2246 + return false; 2247 + } 2248 + 2249 + /* 2231 2250 * APST (Autonomous Power State Transition) lets us program a table of power 2232 2251 * state transitions that the controller will perform automatically. 2233 - * We configure it with a simple heuristic: we are willing to spend at most 2% 2234 - * of the time transitioning between power states. Therefore, when running in 2235 - * any given state, we will enter the next lower-power non-operational state 2236 - * after waiting 50 * (enlat + exlat) microseconds, as long as that state's exit 2237 - * latency is under the requested maximum latency. 2252 + * 2253 + * Depending on module params, one of the two supported techniques will be used: 2254 + * 2255 + * - If the parameters provide explicit timeouts and tolerances, they will be 2256 + * used to build a table with up to 2 non-operational states to transition to. 2257 + * The default parameter values were selected based on the values used by 2258 + * Microsoft's and Intel's NVMe drivers. Yet, since we don't implement dynamic 2259 + * regeneration of the APST table in the event of switching between external 2260 + * and battery power, the timeouts and tolerances reflect a compromise 2261 + * between values used by Microsoft for AC and battery scenarios. 2262 + * - If not, we'll configure the table with a simple heuristic: we are willing 2263 + * to spend at most 2% of the time transitioning between power states. 2264 + * Therefore, when running in any given state, we will enter the next 2265 + * lower-power non-operational state after waiting 50 * (enlat + exlat) 2266 + * microseconds, as long as that state's exit latency is under the requested 2267 + * maximum latency. 2238 2268 * 2239 2269 * We will not autonomously enter any non-operational state for which the total 2240 2270 * latency exceeds ps_max_latency_us. ··· 2290 2240 int max_ps = -1; 2291 2241 int state; 2292 2242 int ret; 2243 + unsigned last_lt_index = UINT_MAX; 2293 2244 2294 2245 /* 2295 2246 * If APST isn't supported or if we haven't been initialized yet, ··· 2349 2298 le32_to_cpu(ctrl->psd[state].entry_lat); 2350 2299 2351 2300 /* 2352 - * This state is good. Use it as the APST idle target for 2353 - * higher power states. 2301 + * This state is good. It can be used as the APST idle target 2302 + * for higher power states. 2354 2303 */ 2355 - transition_ms = total_latency_us + 19; 2356 - do_div(transition_ms, 20); 2357 - if (transition_ms > (1 << 24) - 1) 2358 - transition_ms = (1 << 24) - 1; 2304 + if (apst_primary_timeout_ms && apst_primary_latency_tol_us) { 2305 + if (!nvme_apst_get_transition_time(total_latency_us, 2306 + &transition_ms, &last_lt_index)) 2307 + continue; 2308 + } else { 2309 + transition_ms = total_latency_us + 19; 2310 + do_div(transition_ms, 20); 2311 + if (transition_ms > (1 << 24) - 1) 2312 + transition_ms = (1 << 24) - 1; 2313 + } 2359 2314 2360 2315 target = cpu_to_le64((state << 3) | (transition_ms << 8)); 2361 2316 if (max_ps == -1) ··· 4125 4068 4126 4069 ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s", 4127 4070 opts->host_traddr ?: "none"); 4071 + if (ret) 4072 + return ret; 4073 + 4074 + ret = add_uevent_var(env, "NVME_HOST_IFACE=%s", 4075 + opts->host_iface ?: "none"); 4128 4076 } 4129 4077 return ret; 4130 4078 }

+29 -29

drivers/nvme/host/fabrics.c

··· 112 112 if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) 113 113 len += scnprintf(buf + len, size - len, "%shost_traddr=%s", 114 114 (len) ? "," : "", ctrl->opts->host_traddr); 115 + if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE) 116 + len += scnprintf(buf + len, size - len, "%shost_iface=%s", 117 + (len) ? "," : "", ctrl->opts->host_iface); 115 118 len += scnprintf(buf + len, size - len, "\n"); 116 119 117 120 return len; ··· 190 187 */ 191 188 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) 192 189 { 193 - struct nvme_command cmd; 190 + struct nvme_command cmd = { }; 194 191 union nvme_result res; 195 192 int ret; 196 193 197 - memset(&cmd, 0, sizeof(cmd)); 198 194 cmd.prop_get.opcode = nvme_fabrics_command; 199 195 cmd.prop_get.fctype = nvme_fabrics_type_property_get; 200 196 cmd.prop_get.attrib = 1; ··· 235 233 */ 236 234 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) 237 235 { 238 - struct nvme_command cmd; 236 + struct nvme_command cmd = { }; 239 237 int ret; 240 238 241 - memset(&cmd, 0, sizeof(cmd)); 242 239 cmd.prop_set.opcode = nvme_fabrics_command; 243 240 cmd.prop_set.fctype = nvme_fabrics_type_property_set; 244 241 cmd.prop_set.attrib = 0; ··· 255 254 EXPORT_SYMBOL_GPL(nvmf_reg_write32); 256 255 257 256 /** 258 - * nvmf_log_connect_error() - Error-parsing-diagnostic print 259 - * out function for connect() errors. 260 - * 261 - * @ctrl: the specific /dev/nvmeX device that had the error. 262 - * 263 - * @errval: Error code to be decoded in a more human-friendly 264 - * printout. 265 - * 266 - * @offset: For use with the NVMe error code NVME_SC_CONNECT_INVALID_PARAM. 267 - * 268 - * @cmd: This is the SQE portion of a submission capsule. 269 - * 270 - * @data: This is the "Data" portion of a submission capsule. 257 + * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for 258 + * connect() errors. 259 + * @ctrl: The specific /dev/nvmeX device that had the error. 260 + * @errval: Error code to be decoded in a more human-friendly 261 + * printout. 262 + * @offset: For use with the NVMe error code 263 + * NVME_SC_CONNECT_INVALID_PARAM. 264 + * @cmd: This is the SQE portion of a submission capsule. 265 + * @data: This is the "Data" portion of a submission capsule. 271 266 */ 272 267 static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, 273 268 int errval, int offset, struct nvme_command *cmd, 274 269 struct nvmf_connect_data *data) 275 270 { 276 - int err_sctype = errval & (~NVME_SC_DNR); 271 + int err_sctype = errval & ~NVME_SC_DNR; 277 272 278 273 switch (err_sctype) { 279 - 280 274 case (NVME_SC_CONNECT_INVALID_PARAM): 281 275 if (offset >> 16) { 282 276 char *inv_data = "Connect Invalid Data Parameter"; ··· 314 318 } 315 319 } 316 320 break; 317 - 318 321 case NVME_SC_CONNECT_INVALID_HOST: 319 322 dev_err(ctrl->device, 320 323 "Connect for subsystem %s is not allowed, hostnqn: %s\n", 321 324 data->subsysnqn, data->hostnqn); 322 325 break; 323 - 324 326 case NVME_SC_CONNECT_CTRL_BUSY: 325 327 dev_err(ctrl->device, 326 328 "Connect command failed: controller is busy or not available\n"); 327 329 break; 328 - 329 330 case NVME_SC_CONNECT_FORMAT: 330 331 dev_err(ctrl->device, 331 332 "Connect incompatible format: %d", 332 333 cmd->connect.recfmt); 333 334 break; 334 - 335 335 case NVME_SC_HOST_PATH_ERROR: 336 336 dev_err(ctrl->device, 337 337 "Connect command failed: host path error\n"); 338 338 break; 339 - 340 339 default: 341 340 dev_err(ctrl->device, 342 341 "Connect command failed, error wo/DNR bit: %d\n", 343 342 err_sctype); 344 343 break; 345 - } /* switch (err_sctype) */ 344 + } 346 345 } 347 346 348 347 /** ··· 362 371 */ 363 372 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) 364 373 { 365 - struct nvme_command cmd; 374 + struct nvme_command cmd = { }; 366 375 union nvme_result res; 367 376 struct nvmf_connect_data *data; 368 377 int ret; 369 378 370 - memset(&cmd, 0, sizeof(cmd)); 371 379 cmd.connect.opcode = nvme_fabrics_command; 372 380 cmd.connect.fctype = nvme_fabrics_type_connect; 373 381 cmd.connect.qid = 0; ··· 429 439 */ 430 440 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll) 431 441 { 432 - struct nvme_command cmd; 442 + struct nvme_command cmd = { }; 433 443 struct nvmf_connect_data *data; 434 444 union nvme_result res; 435 445 int ret; 436 446 437 - memset(&cmd, 0, sizeof(cmd)); 438 447 cmd.connect.opcode = nvme_fabrics_command; 439 448 cmd.connect.fctype = nvme_fabrics_type_connect; 440 449 cmd.connect.qid = cpu_to_le16(qid); ··· 539 550 { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, 540 551 { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, 541 552 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, 553 + { NVMF_OPT_HOST_IFACE, "host_iface=%s" }, 542 554 { NVMF_OPT_HOST_ID, "hostid=%s" }, 543 555 { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, 544 556 { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" }, ··· 749 759 kfree(opts->host_traddr); 750 760 opts->host_traddr = p; 751 761 break; 762 + case NVMF_OPT_HOST_IFACE: 763 + p = match_strdup(args); 764 + if (!p) { 765 + ret = -ENOMEM; 766 + goto out; 767 + } 768 + kfree(opts->host_iface); 769 + opts->host_iface = p; 770 + break; 752 771 case NVMF_OPT_HOST_ID: 753 772 p = match_strdup(args); 754 773 if (!p) { ··· 942 943 kfree(opts->trsvcid); 943 944 kfree(opts->subsysnqn); 944 945 kfree(opts->host_traddr); 946 + kfree(opts->host_iface); 945 947 kfree(opts); 946 948 } 947 949 EXPORT_SYMBOL_GPL(nvmf_free_options);

+5 -1

drivers/nvme/host/fabrics.h

··· 66 66 NVMF_OPT_NR_POLL_QUEUES = 1 << 18, 67 67 NVMF_OPT_TOS = 1 << 19, 68 68 NVMF_OPT_FAIL_FAST_TMO = 1 << 20, 69 + NVMF_OPT_HOST_IFACE = 1 << 21, 69 70 }; 70 71 71 72 /** ··· 84 83 * @trsvcid: The transport-specific TRSVCID field for a port on the 85 84 * subsystem which is adding a controller. 86 85 * @host_traddr: A transport-specific field identifying the NVME host port 87 - * to use for the connection to the controller. 86 + * to use for the connection to the controller. 87 + * @host_iface: A transport-specific field identifying the NVME host 88 + * interface to use for the connection to the controller. 88 89 * @queue_size: Number of IO queue elements. 89 90 * @nr_io_queues: Number of controller IO queues that will be established. 90 91 * @reconnect_delay: Time between two consecutive reconnect attempts. ··· 111 108 char *traddr; 112 109 char *trsvcid; 113 110 char *host_traddr; 111 + char *host_iface; 114 112 size_t queue_size; 115 113 unsigned int nr_io_queues; 116 114 unsigned int reconnect_delay;

+1 -1

drivers/nvme/host/fc.c

··· 3112 3112 } 3113 3113 3114 3114 /* FC-NVME supports normal SGL Data Block Descriptors */ 3115 - if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) { 3115 + if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) { 3116 3116 dev_err(ctrl->ctrl.device, 3117 3117 "Mandatory sgls are not supported!\n"); 3118 3118 ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;

+33 -28

drivers/nvme/host/ioctl.c

··· 177 177 metadata, meta_len, lower_32_bits(io.slba), NULL, 0); 178 178 } 179 179 180 + static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 181 + struct nvme_ns *ns, __u32 nsid) 182 + { 183 + if (ns && nsid != ns->head->ns_id) { 184 + dev_err(ctrl->device, 185 + "%s: nsid (%u) in cmd does not match nsid (%u)" 186 + "of namespace\n", 187 + current->comm, nsid, ns->head->ns_id); 188 + return false; 189 + } 190 + 191 + return true; 192 + } 193 + 180 194 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 181 195 struct nvme_passthru_cmd __user *ucmd) 182 196 { ··· 206 192 return -EFAULT; 207 193 if (cmd.flags) 208 194 return -EINVAL; 209 - if (ns && cmd.nsid != ns->head->ns_id) { 210 - dev_err(ctrl->device, 211 - "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 212 - current->comm, cmd.nsid, ns->head->ns_id); 195 + if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 213 196 return -EINVAL; 214 - } 215 197 216 198 memset(&c, 0, sizeof(c)); 217 199 c.common.opcode = cmd.opcode; ··· 252 242 return -EFAULT; 253 243 if (cmd.flags) 254 244 return -EINVAL; 255 - if (ns && cmd.nsid != ns->head->ns_id) { 256 - dev_err(ctrl->device, 257 - "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 258 - current->comm, cmd.nsid, ns->head->ns_id); 245 + if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 259 246 return -EINVAL; 260 - } 261 247 262 248 memset(&c, 0, sizeof(c)); 263 249 c.common.opcode = cmd.opcode; ··· 378 372 #ifdef CONFIG_NVME_MULTIPATH 379 373 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 380 374 void __user *argp, struct nvme_ns_head *head, int srcu_idx) 375 + __releases(&head->srcu) 381 376 { 382 377 struct nvme_ctrl *ctrl = ns->ctrl; 383 378 int ret; 384 379 385 380 nvme_get_ctrl(ns->ctrl); 386 - nvme_put_ns_from_disk(head, srcu_idx); 381 + srcu_read_unlock(&head->srcu, srcu_idx); 387 382 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); 388 383 389 384 nvme_put_ctrl(ctrl); ··· 394 387 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, 395 388 unsigned int cmd, unsigned long arg) 396 389 { 397 - struct nvme_ns_head *head = NULL; 390 + struct nvme_ns_head *head = bdev->bd_disk->private_data; 398 391 void __user *argp = (void __user *)arg; 399 392 struct nvme_ns *ns; 400 - int srcu_idx, ret; 393 + int srcu_idx, ret = -EWOULDBLOCK; 401 394 402 - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 403 - if (unlikely(!ns)) 404 - return -EWOULDBLOCK; 395 + srcu_idx = srcu_read_lock(&head->srcu); 396 + ns = nvme_find_path(head); 397 + if (!ns) 398 + goto out_unlock; 405 399 406 400 /* 407 401 * Handle ioctls that apply to the controller instead of the namespace ··· 410 402 * deadlock when deleting namespaces using the passthrough interface. 411 403 */ 412 404 if (is_ctrl_ioctl(cmd)) 413 - ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 414 - else { 415 - ret = nvme_ns_ioctl(ns, cmd, argp); 416 - nvme_put_ns_from_disk(head, srcu_idx); 417 - } 405 + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 418 406 407 + ret = nvme_ns_ioctl(ns, cmd, argp); 408 + out_unlock: 409 + srcu_read_unlock(&head->srcu, srcu_idx); 419 410 return ret; 420 411 } 421 412 ··· 426 419 container_of(cdev, struct nvme_ns_head, cdev); 427 420 void __user *argp = (void __user *)arg; 428 421 struct nvme_ns *ns; 429 - int srcu_idx, ret; 422 + int srcu_idx, ret = -EWOULDBLOCK; 430 423 431 424 srcu_idx = srcu_read_lock(&head->srcu); 432 425 ns = nvme_find_path(head); 433 - if (!ns) { 434 - srcu_read_unlock(&head->srcu, srcu_idx); 435 - return -EWOULDBLOCK; 436 - } 426 + if (!ns) 427 + goto out_unlock; 437 428 438 429 if (is_ctrl_ioctl(cmd)) 439 430 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 440 431 441 432 ret = nvme_ns_ioctl(ns, cmd, argp); 442 - nvme_put_ns_from_disk(head, srcu_idx); 443 - 433 + out_unlock: 434 + srcu_read_unlock(&head->srcu, srcu_idx); 444 435 return ret; 445 436 } 446 437 #endif /* CONFIG_NVME_MULTIPATH */

+27 -6

drivers/nvme/host/multipath.c

··· 349 349 nvme_put_ns_head(disk->private_data); 350 350 } 351 351 352 + #ifdef CONFIG_BLK_DEV_ZONED 353 + static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector, 354 + unsigned int nr_zones, report_zones_cb cb, void *data) 355 + { 356 + struct nvme_ns_head *head = disk->private_data; 357 + struct nvme_ns *ns; 358 + int srcu_idx, ret = -EWOULDBLOCK; 359 + 360 + srcu_idx = srcu_read_lock(&head->srcu); 361 + ns = nvme_find_path(head); 362 + if (ns) 363 + ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data); 364 + srcu_read_unlock(&head->srcu, srcu_idx); 365 + return ret; 366 + } 367 + #else 368 + #define nvme_ns_head_report_zones NULL 369 + #endif /* CONFIG_BLK_DEV_ZONED */ 370 + 352 371 const struct block_device_operations nvme_ns_head_ops = { 353 372 .owner = THIS_MODULE, 354 373 .submit_bio = nvme_ns_head_submit_bio, ··· 375 356 .release = nvme_ns_head_release, 376 357 .ioctl = nvme_ns_head_ioctl, 377 358 .getgeo = nvme_getgeo, 378 - .report_zones = nvme_report_zones, 359 + .report_zones = nvme_ns_head_report_zones, 379 360 .pr_ops = &nvme_pr_ops, 380 361 }; 381 362 ··· 435 416 next = bio->bi_next; 436 417 bio->bi_next = NULL; 437 418 438 - /* 439 - * Reset disk to the mpath node and resubmit to select a new 440 - * path. 441 - */ 442 - bio_set_dev(bio, head->disk->part0); 443 419 submit_bio_noacct(bio); 444 420 } 445 421 } ··· 792 778 if (!multipath || !ctrl->subsys || 793 779 !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) 794 780 return 0; 781 + 782 + if (!ctrl->max_namespaces || 783 + ctrl->max_namespaces > le32_to_cpu(id->nn)) { 784 + dev_err(ctrl->device, 785 + "Invalid MNAN value %u\n", ctrl->max_namespaces); 786 + return -EINVAL; 787 + } 795 788 796 789 ctrl->anacap = id->anacap; 797 790 ctrl->anatt = id->anatt;

+8 -9

drivers/nvme/host/nvme.h

··· 674 674 void nvme_queue_scan(struct nvme_ctrl *ctrl); 675 675 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, 676 676 void *log, size_t size, u64 offset); 677 - struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, 678 - struct nvme_ns_head **head, int *srcu_idx); 679 - void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); 680 677 bool nvme_tryget_ns_head(struct nvme_ns_head *head); 681 678 void nvme_put_ns_head(struct nvme_ns_head *head); 682 679 int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, ··· 694 697 extern const struct pr_ops nvme_pr_ops; 695 698 extern const struct block_device_operations nvme_ns_head_ops; 696 699 700 + struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); 697 701 #ifdef CONFIG_NVME_MULTIPATH 698 702 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) 699 703 { ··· 716 718 void nvme_mpath_stop(struct nvme_ctrl *ctrl); 717 719 bool nvme_mpath_clear_current_path(struct nvme_ns *ns); 718 720 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); 719 - struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); 720 721 721 722 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) 722 723 { ··· 807 810 #endif /* CONFIG_NVME_MULTIPATH */ 808 811 809 812 int nvme_revalidate_zones(struct nvme_ns *ns); 813 + int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 814 + unsigned int nr_zones, report_zones_cb cb, void *data); 810 815 #ifdef CONFIG_BLK_DEV_ZONED 811 816 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf); 812 - int nvme_report_zones(struct gendisk *disk, sector_t sector, 813 - unsigned int nr_zones, report_zones_cb cb, void *data); 814 - 815 817 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, 816 818 struct nvme_command *cmnd, 817 819 enum nvme_zone_mgmt_action action); 818 820 #else 819 - #define nvme_report_zones NULL 820 - 821 821 static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, 822 822 struct request *req, struct nvme_command *cmnd, 823 823 enum nvme_zone_mgmt_action action) ··· 868 874 { 869 875 } 870 876 #endif 877 + 878 + static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) 879 + { 880 + return ctrl->sgls & ((1 << 0) | (1 << 1)); 881 + } 871 882 872 883 u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 873 884 u8 opcode);

+12 -70

drivers/nvme/host/pci.c

··· 307 307 308 308 static void nvme_dbbuf_set(struct nvme_dev *dev) 309 309 { 310 - struct nvme_command c; 310 + struct nvme_command c = { }; 311 311 unsigned int i; 312 312 313 313 if (!dev->dbbuf_dbs) 314 314 return; 315 315 316 - memset(&c, 0, sizeof(c)); 317 316 c.dbbuf.opcode = nvme_admin_dbbuf; 318 317 c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); 319 318 c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); ··· 535 536 536 537 avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); 537 538 538 - if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) 539 + if (!nvme_ctrl_sgl_supported(&dev->ctrl)) 539 540 return false; 540 541 if (!iod->nvmeq->qid) 541 542 return false; ··· 558 559 dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); 559 560 dma_addr = next_dma_addr; 560 561 } 561 - 562 562 } 563 563 564 564 static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) ··· 574 576 dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); 575 577 dma_addr = next_dma_addr; 576 578 } 577 - 578 579 } 579 580 580 581 static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) ··· 852 855 &cmnd->rw, &bv); 853 856 854 857 if (iod->nvmeq->qid && sgl_threshold && 855 - dev->ctrl.sgls & ((1 << 0) | (1 << 1))) 858 + nvme_ctrl_sgl_supported(&dev->ctrl)) 856 859 return nvme_setup_sgl_simple(dev, req, 857 860 &cmnd->rw, &bv); 858 861 } ··· 1029 1032 1030 1033 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) 1031 1034 { 1032 - u16 tmp = nvmeq->cq_head + 1; 1035 + u32 tmp = nvmeq->cq_head + 1; 1033 1036 1034 1037 if (tmp == nvmeq->q_depth) { 1035 1038 nvmeq->cq_head = 0; ··· 1111 1114 { 1112 1115 struct nvme_dev *dev = to_nvme_dev(ctrl); 1113 1116 struct nvme_queue *nvmeq = &dev->queues[0]; 1114 - struct nvme_command c; 1117 + struct nvme_command c = { }; 1115 1118 1116 - memset(&c, 0, sizeof(c)); 1117 1119 c.common.opcode = nvme_admin_async_event; 1118 1120 c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; 1119 1121 nvme_submit_cmd(nvmeq, &c, true); ··· 1120 1124 1121 1125 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 1122 1126 { 1123 - struct nvme_command c; 1127 + struct nvme_command c = { }; 1124 1128 1125 - memset(&c, 0, sizeof(c)); 1126 1129 c.delete_queue.opcode = opcode; 1127 1130 c.delete_queue.qid = cpu_to_le16(id); 1128 1131 ··· 1131 1136 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, 1132 1137 struct nvme_queue *nvmeq, s16 vector) 1133 1138 { 1134 - struct nvme_command c; 1139 + struct nvme_command c = { }; 1135 1140 int flags = NVME_QUEUE_PHYS_CONTIG; 1136 1141 1137 1142 if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) ··· 1141 1146 * Note: we (ab)use the fact that the prp fields survive if no data 1142 1147 * is attached to the request. 1143 1148 */ 1144 - memset(&c, 0, sizeof(c)); 1145 1149 c.create_cq.opcode = nvme_admin_create_cq; 1146 1150 c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); 1147 1151 c.create_cq.cqid = cpu_to_le16(qid); ··· 1155 1161 struct nvme_queue *nvmeq) 1156 1162 { 1157 1163 struct nvme_ctrl *ctrl = &dev->ctrl; 1158 - struct nvme_command c; 1164 + struct nvme_command c = { }; 1159 1165 int flags = NVME_QUEUE_PHYS_CONTIG; 1160 1166 1161 1167 /* ··· 1170 1176 * Note: we (ab)use the fact that the prp fields survive if no data 1171 1177 * is attached to the request. 1172 1178 */ 1173 - memset(&c, 0, sizeof(c)); 1174 1179 c.create_sq.opcode = nvme_admin_create_sq; 1175 1180 c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); 1176 1181 c.create_sq.sqid = cpu_to_le16(qid); ··· 1250 1257 struct nvme_queue *nvmeq = iod->nvmeq; 1251 1258 struct nvme_dev *dev = nvmeq->dev; 1252 1259 struct request *abort_req; 1253 - struct nvme_command cmd; 1260 + struct nvme_command cmd = { }; 1254 1261 u32 csts = readl(dev->bar + NVME_REG_CSTS); 1255 1262 1256 1263 /* If PCI error recovery process is happening, we cannot reset or ··· 1330 1337 } 1331 1338 iod->aborted = 1; 1332 1339 1333 - memset(&cmd, 0, sizeof(cmd)); 1334 1340 cmd.abort.opcode = nvme_admin_abort_cmd; 1335 1341 cmd.abort.cid = req->tag; 1336 1342 cmd.abort.sqid = cpu_to_le16(nvmeq->qid); ··· 1880 1888 { 1881 1889 u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT; 1882 1890 u64 dma_addr = dev->host_mem_descs_dma; 1883 - struct nvme_command c; 1891 + struct nvme_command c = { }; 1884 1892 int ret; 1885 1893 1886 - memset(&c, 0, sizeof(c)); 1887 1894 c.features.opcode = nvme_admin_set_features; 1888 1895 c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); 1889 1896 c.features.dword11 = cpu_to_le32(bits); ··· 2256 2265 { 2257 2266 struct request_queue *q = nvmeq->dev->ctrl.admin_q; 2258 2267 struct request *req; 2259 - struct nvme_command cmd; 2268 + struct nvme_command cmd = { }; 2260 2269 2261 - memset(&cmd, 0, sizeof(cmd)); 2262 2270 cmd.delete_queue.opcode = opcode; 2263 2271 cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); 2264 2272 ··· 2818 2828 return 0; 2819 2829 } 2820 2830 2821 - #ifdef CONFIG_ACPI 2822 - static bool nvme_acpi_storage_d3(struct pci_dev *dev) 2823 - { 2824 - struct acpi_device *adev; 2825 - struct pci_dev *root; 2826 - acpi_handle handle; 2827 - acpi_status status; 2828 - u8 val; 2829 - 2830 - /* 2831 - * Look for _DSD property specifying that the storage device on the port 2832 - * must use D3 to support deep platform power savings during 2833 - * suspend-to-idle. 2834 - */ 2835 - root = pcie_find_root_port(dev); 2836 - if (!root) 2837 - return false; 2838 - 2839 - adev = ACPI_COMPANION(&root->dev); 2840 - if (!adev) 2841 - return false; 2842 - 2843 - /* 2844 - * The property is defined in the PXSX device for South complex ports 2845 - * and in the PEGP device for North complex ports. 2846 - */ 2847 - status = acpi_get_handle(adev->handle, "PXSX", &handle); 2848 - if (ACPI_FAILURE(status)) { 2849 - status = acpi_get_handle(adev->handle, "PEGP", &handle); 2850 - if (ACPI_FAILURE(status)) 2851 - return false; 2852 - } 2853 - 2854 - if (acpi_bus_get_device(handle, &adev)) 2855 - return false; 2856 - 2857 - if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable", 2858 - &val)) 2859 - return false; 2860 - return val == 1; 2861 - } 2862 - #else 2863 - static inline bool nvme_acpi_storage_d3(struct pci_dev *dev) 2864 - { 2865 - return false; 2866 - } 2867 - #endif /* CONFIG_ACPI */ 2868 - 2869 2831 static void nvme_async_probe(void *data, async_cookie_t cookie) 2870 2832 { 2871 2833 struct nvme_dev *dev = data; ··· 2867 2925 2868 2926 quirks |= check_vendor_combination_bug(pdev); 2869 2927 2870 - if (!noacpi && nvme_acpi_storage_d3(pdev)) { 2928 + if (!noacpi && acpi_storage_d3(&pdev->dev)) { 2871 2929 /* 2872 2930 * Some systems use a bios work around to ask for D3 on 2873 2931 * platforms that support kernel managed suspend.

+1 -1

drivers/nvme/host/rdma.c

··· 1088 1088 1089 1089 static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) 1090 1090 { 1091 - int ret = -EINVAL; 1091 + int ret; 1092 1092 bool changed; 1093 1093 1094 1094 ret = nvme_rdma_configure_admin_queue(ctrl, new);

+29 -2

drivers/nvme/host/tcp.c

··· 123 123 struct blk_mq_tag_set admin_tag_set; 124 124 struct sockaddr_storage addr; 125 125 struct sockaddr_storage src_addr; 126 + struct net_device *ndev; 126 127 struct nvme_ctrl ctrl; 127 128 128 129 struct work_struct err_work; ··· 1456 1455 } 1457 1456 } 1458 1457 1458 + if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) { 1459 + char *iface = nctrl->opts->host_iface; 1460 + sockptr_t optval = KERNEL_SOCKPTR(iface); 1461 + 1462 + ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE, 1463 + optval, strlen(iface)); 1464 + if (ret) { 1465 + dev_err(nctrl->device, 1466 + "failed to bind to interface %s queue %d err %d\n", 1467 + iface, qid, ret); 1468 + goto err_sock; 1469 + } 1470 + } 1471 + 1459 1472 queue->hdr_digest = nctrl->opts->hdr_digest; 1460 1473 queue->data_digest = nctrl->opts->data_digest; 1461 1474 if (queue->hdr_digest || queue->data_digest) { ··· 1988 1973 return ret; 1989 1974 1990 1975 if (ctrl->icdoff) { 1976 + ret = -EOPNOTSUPP; 1991 1977 dev_err(ctrl->device, "icdoff is not supported!\n"); 1992 1978 goto destroy_admin; 1993 1979 } 1994 1980 1995 - if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) { 1981 + if (!nvme_ctrl_sgl_supported(ctrl)) { 1982 + ret = -EOPNOTSUPP; 1996 1983 dev_err(ctrl->device, "Mandatory sgls are not supported!\n"); 1997 1984 goto destroy_admin; 1998 1985 } ··· 2532 2515 } 2533 2516 } 2534 2517 2518 + if (opts->mask & NVMF_OPT_HOST_IFACE) { 2519 + ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface); 2520 + if (!ctrl->ndev) { 2521 + pr_err("invalid interface passed: %s\n", 2522 + opts->host_iface); 2523 + ret = -ENODEV; 2524 + goto out_free_ctrl; 2525 + } 2526 + } 2527 + 2535 2528 if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) { 2536 2529 ret = -EALREADY; 2537 2530 goto out_free_ctrl; ··· 2598 2571 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | 2599 2572 NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | 2600 2573 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | 2601 - NVMF_OPT_TOS, 2574 + NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE, 2602 2575 .create_ctrl = nvme_tcp_create_ctrl, 2603 2576 }; 2604 2577

+5 -22

drivers/nvme/host/zns.c

··· 171 171 return cb(&zone, idx, data); 172 172 } 173 173 174 - static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 175 - unsigned int nr_zones, report_zones_cb cb, void *data) 174 + int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 175 + unsigned int nr_zones, report_zones_cb cb, void *data) 176 176 { 177 177 struct nvme_zone_report *report; 178 178 struct nvme_command c = { }; 179 179 int ret, zone_idx = 0; 180 180 unsigned int nz, i; 181 181 size_t buflen; 182 + 183 + if (ns->head->ids.csi != NVME_CSI_ZNS) 184 + return -EINVAL; 182 185 183 186 report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen); 184 187 if (!report) ··· 227 224 ret = -EINVAL; 228 225 out_free: 229 226 kvfree(report); 230 - return ret; 231 - } 232 - 233 - int nvme_report_zones(struct gendisk *disk, sector_t sector, 234 - unsigned int nr_zones, report_zones_cb cb, void *data) 235 - { 236 - struct nvme_ns_head *head = NULL; 237 - struct nvme_ns *ns; 238 - int srcu_idx, ret; 239 - 240 - ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx); 241 - if (unlikely(!ns)) 242 - return -EWOULDBLOCK; 243 - 244 - if (ns->head->ids.csi == NVME_CSI_ZNS) 245 - ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data); 246 - else 247 - ret = -EINVAL; 248 - nvme_put_ns_from_disk(head, srcu_idx); 249 - 250 227 return ret; 251 228 } 252 229

+1

drivers/nvme/target/Makefile

··· 12 12 nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ 13 13 discovery.o io-cmd-file.o io-cmd-bdev.o 14 14 nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o 15 + nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o 15 16 nvme-loop-y += loop.o 16 17 nvmet-rdma-y += rdma.o 17 18 nvmet-fc-y += fc.o

+110 -45

drivers/nvme/target/admin-cmd.c

··· 162 162 nvmet_req_complete(req, status); 163 163 } 164 164 165 - static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) 165 + static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) 166 166 { 167 - u16 status = NVME_SC_INTERNAL; 168 - struct nvme_effects_log *log; 169 - 170 - log = kzalloc(sizeof(*log), GFP_KERNEL); 171 - if (!log) 172 - goto out; 173 - 174 167 log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); 175 168 log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); 176 169 log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); ··· 177 184 log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); 178 185 log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); 179 186 log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); 187 + } 188 + 189 + static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) 190 + { 191 + log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); 192 + log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); 193 + log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); 194 + } 195 + 196 + static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) 197 + { 198 + struct nvme_effects_log *log; 199 + u16 status = NVME_SC_SUCCESS; 200 + 201 + log = kzalloc(sizeof(*log), GFP_KERNEL); 202 + if (!log) { 203 + status = NVME_SC_INTERNAL; 204 + goto out; 205 + } 206 + 207 + switch (req->cmd->get_log_page.csi) { 208 + case NVME_CSI_NVM: 209 + nvmet_get_cmd_effects_nvm(log); 210 + break; 211 + case NVME_CSI_ZNS: 212 + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 213 + status = NVME_SC_INVALID_IO_CMD_SET; 214 + goto free; 215 + } 216 + nvmet_get_cmd_effects_nvm(log); 217 + nvmet_get_cmd_effects_zns(log); 218 + break; 219 + default: 220 + status = NVME_SC_INVALID_LOG_PAGE; 221 + goto free; 222 + } 180 223 181 224 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 182 - 225 + free: 183 226 kfree(log); 184 227 out: 185 228 nvmet_req_complete(req, status); ··· 342 313 nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); 343 314 } 344 315 345 - static u16 nvmet_set_model_number(struct nvmet_subsys *subsys) 346 - { 347 - u16 status = 0; 348 - 349 - mutex_lock(&subsys->lock); 350 - if (!subsys->model_number) { 351 - subsys->model_number = 352 - kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 353 - if (!subsys->model_number) 354 - status = NVME_SC_INTERNAL; 355 - } 356 - mutex_unlock(&subsys->lock); 357 - 358 - return status; 359 - } 360 - 361 316 static void nvmet_execute_identify_ctrl(struct nvmet_req *req) 362 317 { 363 318 struct nvmet_ctrl *ctrl = req->sq->ctrl; ··· 350 337 u32 cmd_capsule_size; 351 338 u16 status = 0; 352 339 353 - /* 354 - * If there is no model number yet, set it now. It will then remain 355 - * stable for the life time of the subsystem. 356 - */ 357 - if (!subsys->model_number) { 358 - status = nvmet_set_model_number(subsys); 359 - if (status) 360 - goto out; 340 + if (!subsys->subsys_discovered) { 341 + mutex_lock(&subsys->lock); 342 + subsys->subsys_discovered = true; 343 + mutex_unlock(&subsys->lock); 361 344 } 362 345 363 346 id = kzalloc(sizeof(*id), GFP_KERNEL); ··· 366 357 id->vid = 0; 367 358 id->ssvid = 0; 368 359 369 - memset(id->sn, ' ', sizeof(id->sn)); 370 - bin2hex(id->sn, &ctrl->subsys->serial, 371 - min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); 360 + memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE); 372 361 memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number, 373 362 strlen(subsys->model_number), ' '); 374 363 memcpy_and_pad(id->fr, sizeof(id->fr), ··· 422 415 /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ 423 416 id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); 424 417 425 - id->nn = cpu_to_le32(ctrl->subsys->max_nsid); 418 + id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); 426 419 id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); 427 420 id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | 428 421 NVME_CTRL_ONCS_WRITE_ZEROES); ··· 642 635 goto out; 643 636 } 644 637 638 + status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI, 639 + NVME_NIDT_CSI_LEN, 640 + &req->ns->csi, &off); 641 + if (status) 642 + goto out; 643 + 645 644 if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off, 646 645 off) != NVME_IDENTIFY_DATA_SIZE - off) 647 646 status = NVME_SC_INTERNAL | NVME_SC_DNR; 648 647 649 648 out: 650 649 nvmet_req_complete(req, status); 650 + } 651 + 652 + static bool nvmet_handle_identify_desclist(struct nvmet_req *req) 653 + { 654 + switch (req->cmd->identify.csi) { 655 + case NVME_CSI_NVM: 656 + nvmet_execute_identify_desclist(req); 657 + return true; 658 + case NVME_CSI_ZNS: 659 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 660 + nvmet_execute_identify_desclist(req); 661 + return true; 662 + } 663 + return false; 664 + default: 665 + return false; 666 + } 651 667 } 652 668 653 669 static void nvmet_execute_identify(struct nvmet_req *req) ··· 680 650 681 651 switch (req->cmd->identify.cns) { 682 652 case NVME_ID_CNS_NS: 683 - return nvmet_execute_identify_ns(req); 653 + switch (req->cmd->identify.csi) { 654 + case NVME_CSI_NVM: 655 + return nvmet_execute_identify_ns(req); 656 + default: 657 + break; 658 + } 659 + break; 660 + case NVME_ID_CNS_CS_NS: 661 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 662 + switch (req->cmd->identify.csi) { 663 + case NVME_CSI_ZNS: 664 + return nvmet_execute_identify_cns_cs_ns(req); 665 + default: 666 + break; 667 + } 668 + } 669 + break; 684 670 case NVME_ID_CNS_CTRL: 685 - return nvmet_execute_identify_ctrl(req); 671 + switch (req->cmd->identify.csi) { 672 + case NVME_CSI_NVM: 673 + return nvmet_execute_identify_ctrl(req); 674 + } 675 + break; 676 + case NVME_ID_CNS_CS_CTRL: 677 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 678 + switch (req->cmd->identify.csi) { 679 + case NVME_CSI_ZNS: 680 + return nvmet_execute_identify_cns_cs_ctrl(req); 681 + default: 682 + break; 683 + } 684 + } 685 + break; 686 686 case NVME_ID_CNS_NS_ACTIVE_LIST: 687 - return nvmet_execute_identify_nslist(req); 687 + switch (req->cmd->identify.csi) { 688 + case NVME_CSI_NVM: 689 + return nvmet_execute_identify_nslist(req); 690 + default: 691 + break; 692 + } 693 + break; 688 694 case NVME_ID_CNS_NS_DESC_LIST: 689 - return nvmet_execute_identify_desclist(req); 695 + if (nvmet_handle_identify_desclist(req) == true) 696 + return; 697 + break; 690 698 } 691 699 692 - pr_debug("unhandled identify cns %d on qid %d\n", 693 - req->cmd->identify.cns, req->sq->qid); 694 - req->error_loc = offsetof(struct nvme_identify, cns); 695 - nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); 700 + nvmet_req_cns_error_complete(req); 696 701 } 697 702 698 703 /*

+76 -26

drivers/nvme/target/configfs.c

··· 1007 1007 NVME_MINOR(subsys->ver)); 1008 1008 } 1009 1009 1010 - static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, 1011 - const char *page, size_t count) 1010 + static ssize_t 1011 + nvmet_subsys_attr_version_store_locked(struct nvmet_subsys *subsys, 1012 + const char *page, size_t count) 1012 1013 { 1013 - struct nvmet_subsys *subsys = to_subsys(item); 1014 1014 int major, minor, tertiary = 0; 1015 1015 int ret; 1016 + 1017 + if (subsys->subsys_discovered) { 1018 + if (NVME_TERTIARY(subsys->ver)) 1019 + pr_err("Can't set version number. %llu.%llu.%llu is already assigned\n", 1020 + NVME_MAJOR(subsys->ver), 1021 + NVME_MINOR(subsys->ver), 1022 + NVME_TERTIARY(subsys->ver)); 1023 + else 1024 + pr_err("Can't set version number. %llu.%llu is already assigned\n", 1025 + NVME_MAJOR(subsys->ver), 1026 + NVME_MINOR(subsys->ver)); 1027 + return -EINVAL; 1028 + } 1016 1029 1017 1030 /* passthru subsystems use the underlying controller's version */ 1018 1031 if (nvmet_passthru_ctrl(subsys)) ··· 1035 1022 if (ret != 2 && ret != 3) 1036 1023 return -EINVAL; 1037 1024 1038 - down_write(&nvmet_config_sem); 1039 1025 subsys->ver = NVME_VS(major, minor, tertiary); 1040 - up_write(&nvmet_config_sem); 1041 1026 1042 1027 return count; 1043 1028 } 1029 + 1030 + static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, 1031 + const char *page, size_t count) 1032 + { 1033 + struct nvmet_subsys *subsys = to_subsys(item); 1034 + ssize_t ret; 1035 + 1036 + down_write(&nvmet_config_sem); 1037 + mutex_lock(&subsys->lock); 1038 + ret = nvmet_subsys_attr_version_store_locked(subsys, page, count); 1039 + mutex_unlock(&subsys->lock); 1040 + up_write(&nvmet_config_sem); 1041 + 1042 + return ret; 1043 + } 1044 1044 CONFIGFS_ATTR(nvmet_subsys_, attr_version); 1045 + 1046 + /* See Section 1.5 of NVMe 1.4 */ 1047 + static bool nvmet_is_ascii(const char c) 1048 + { 1049 + return c >= 0x20 && c <= 0x7e; 1050 + } 1045 1051 1046 1052 static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, 1047 1053 char *page) 1048 1054 { 1049 1055 struct nvmet_subsys *subsys = to_subsys(item); 1050 1056 1051 - return snprintf(page, PAGE_SIZE, "%llx\n", subsys->serial); 1057 + return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial); 1058 + } 1059 + 1060 + static ssize_t 1061 + nvmet_subsys_attr_serial_store_locked(struct nvmet_subsys *subsys, 1062 + const char *page, size_t count) 1063 + { 1064 + int pos, len = strcspn(page, "\n"); 1065 + 1066 + if (subsys->subsys_discovered) { 1067 + pr_err("Can't set serial number. %s is already assigned\n", 1068 + subsys->serial); 1069 + return -EINVAL; 1070 + } 1071 + 1072 + if (!len || len > NVMET_SN_MAX_SIZE) { 1073 + pr_err("Serial Number can not be empty or exceed %d Bytes\n", 1074 + NVMET_SN_MAX_SIZE); 1075 + return -EINVAL; 1076 + } 1077 + 1078 + for (pos = 0; pos < len; pos++) { 1079 + if (!nvmet_is_ascii(page[pos])) { 1080 + pr_err("Serial Number must contain only ASCII strings\n"); 1081 + return -EINVAL; 1082 + } 1083 + } 1084 + 1085 + memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' '); 1086 + 1087 + return count; 1052 1088 } 1053 1089 1054 1090 static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, 1055 1091 const char *page, size_t count) 1056 1092 { 1057 - u64 serial; 1058 - 1059 - if (sscanf(page, "%llx\n", &serial) != 1) 1060 - return -EINVAL; 1093 + struct nvmet_subsys *subsys = to_subsys(item); 1094 + ssize_t ret; 1061 1095 1062 1096 down_write(&nvmet_config_sem); 1063 - to_subsys(item)->serial = serial; 1097 + mutex_lock(&subsys->lock); 1098 + ret = nvmet_subsys_attr_serial_store_locked(subsys, page, count); 1099 + mutex_unlock(&subsys->lock); 1064 1100 up_write(&nvmet_config_sem); 1065 1101 1066 - return count; 1102 + return ret; 1067 1103 } 1068 1104 CONFIGFS_ATTR(nvmet_subsys_, attr_serial); 1069 1105 ··· 1180 1118 char *page) 1181 1119 { 1182 1120 struct nvmet_subsys *subsys = to_subsys(item); 1183 - int ret; 1184 1121 1185 - mutex_lock(&subsys->lock); 1186 - ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ? 1187 - subsys->model_number : NVMET_DEFAULT_CTRL_MODEL); 1188 - mutex_unlock(&subsys->lock); 1189 - 1190 - return ret; 1191 - } 1192 - 1193 - /* See Section 1.5 of NVMe 1.4 */ 1194 - static bool nvmet_is_ascii(const char c) 1195 - { 1196 - return c >= 0x20 && c <= 0x7e; 1122 + return snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number); 1197 1123 } 1198 1124 1199 1125 static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, ··· 1189 1139 { 1190 1140 int pos = 0, len; 1191 1141 1192 - if (subsys->model_number) { 1142 + if (subsys->subsys_discovered) { 1193 1143 pr_err("Can't set model number. %s is already assigned\n", 1194 1144 subsys->model_number); 1195 1145 return -EINVAL;

+67 -33

drivers/nvme/target/core.c

··· 16 16 #include "nvmet.h" 17 17 18 18 struct workqueue_struct *buffered_io_wq; 19 + struct workqueue_struct *zbd_wq; 19 20 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 20 21 static DEFINE_IDA(cntlid_ida); 21 22 ··· 44 43 45 44 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 46 45 { 47 - u16 status; 48 - 49 46 switch (errno) { 50 47 case 0: 51 - status = NVME_SC_SUCCESS; 52 - break; 48 + return NVME_SC_SUCCESS; 53 49 case -ENOSPC: 54 50 req->error_loc = offsetof(struct nvme_rw_command, length); 55 - status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 56 - break; 51 + return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 57 52 case -EREMOTEIO: 58 53 req->error_loc = offsetof(struct nvme_rw_command, slba); 59 - status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 60 - break; 54 + return NVME_SC_LBA_RANGE | NVME_SC_DNR; 61 55 case -EOPNOTSUPP: 62 56 req->error_loc = offsetof(struct nvme_common_command, opcode); 63 57 switch (req->cmd->common.opcode) { 64 58 case nvme_cmd_dsm: 65 59 case nvme_cmd_write_zeroes: 66 - status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 67 - break; 60 + return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 68 61 default: 69 - status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 62 + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 70 63 } 71 64 break; 72 65 case -ENODATA: 73 66 req->error_loc = offsetof(struct nvme_rw_command, nsid); 74 - status = NVME_SC_ACCESS_DENIED; 75 - break; 67 + return NVME_SC_ACCESS_DENIED; 76 68 case -EIO: 77 69 fallthrough; 78 70 default: 79 71 req->error_loc = offsetof(struct nvme_common_command, opcode); 80 - status = NVME_SC_INTERNAL | NVME_SC_DNR; 72 + return NVME_SC_INTERNAL | NVME_SC_DNR; 81 73 } 82 - 83 - return status; 84 74 } 85 75 86 76 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) ··· 114 122 return 0; 115 123 } 116 124 117 - static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 125 + static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 118 126 { 119 - unsigned long nsid = 0; 120 127 struct nvmet_ns *cur; 121 128 unsigned long idx; 129 + u32 nsid = 0; 122 130 123 131 xa_for_each(&subsys->namespaces, idx, cur) 124 132 nsid = cur->nsid; ··· 133 141 134 142 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 135 143 { 136 - u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; 137 144 struct nvmet_req *req; 138 145 139 146 mutex_lock(&ctrl->lock); 140 147 while (ctrl->nr_async_event_cmds) { 141 148 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 142 149 mutex_unlock(&ctrl->lock); 143 - nvmet_req_complete(req, status); 150 + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 144 151 mutex_lock(&ctrl->lock); 145 152 } 146 153 mutex_unlock(&ctrl->lock); ··· 403 412 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 404 413 ctrl->cntlid, ctrl->kato); 405 414 406 - INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 407 415 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 408 416 } 409 417 ··· 683 693 684 694 uuid_gen(&ns->uuid); 685 695 ns->buffered_io = false; 696 + ns->csi = NVME_CSI_NVM; 686 697 687 698 return ns; 688 699 } ··· 886 895 return ret; 887 896 } 888 897 889 - if (req->ns->file) 890 - return nvmet_file_parse_io_cmd(req); 891 - 892 - return nvmet_bdev_parse_io_cmd(req); 898 + switch (req->ns->csi) { 899 + case NVME_CSI_NVM: 900 + if (req->ns->file) 901 + return nvmet_file_parse_io_cmd(req); 902 + return nvmet_bdev_parse_io_cmd(req); 903 + case NVME_CSI_ZNS: 904 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 905 + return nvmet_bdev_zns_parse_io_cmd(req); 906 + return NVME_SC_INVALID_IO_CMD_SET; 907 + default: 908 + return NVME_SC_INVALID_IO_CMD_SET; 909 + } 893 910 } 894 911 895 912 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, ··· 1118 1119 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 1119 1120 } 1120 1121 1122 + static inline bool nvmet_css_supported(u8 cc_css) 1123 + { 1124 + switch (cc_css <<= NVME_CC_CSS_SHIFT) { 1125 + case NVME_CC_CSS_NVM: 1126 + case NVME_CC_CSS_CSI: 1127 + return true; 1128 + default: 1129 + return false; 1130 + } 1131 + } 1132 + 1121 1133 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1122 1134 { 1123 1135 lockdep_assert_held(&ctrl->lock); ··· 1148 1138 1149 1139 if (nvmet_cc_mps(ctrl->cc) != 0 || 1150 1140 nvmet_cc_ams(ctrl->cc) != 0 || 1151 - nvmet_cc_css(ctrl->cc) != 0) { 1141 + !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1152 1142 ctrl->csts = NVME_CSTS_CFS; 1153 1143 return; 1154 1144 } ··· 1199 1189 { 1200 1190 /* command sets supported: NVMe command set: */ 1201 1191 ctrl->cap = (1ULL << 37); 1192 + /* Controller supports one or more I/O Command Sets */ 1193 + ctrl->cap |= (1ULL << 43); 1202 1194 /* CC.EN timeout in 500msec units: */ 1203 1195 ctrl->cap |= (15ULL << 24); 1204 1196 /* maximum queue entries supported: */ ··· 1370 1358 INIT_LIST_HEAD(&ctrl->async_events); 1371 1359 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1372 1360 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1361 + INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1373 1362 1374 1363 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 1375 1364 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); ··· 1512 1499 enum nvme_subsys_type type) 1513 1500 { 1514 1501 struct nvmet_subsys *subsys; 1502 + char serial[NVMET_SN_MAX_SIZE / 2]; 1503 + int ret; 1515 1504 1516 1505 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1517 1506 if (!subsys) ··· 1521 1506 1522 1507 subsys->ver = NVMET_DEFAULT_VS; 1523 1508 /* generate a random serial number as our controllers are ephemeral: */ 1524 - get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1509 + get_random_bytes(&serial, sizeof(serial)); 1510 + bin2hex(subsys->serial, &serial, sizeof(serial)); 1511 + 1512 + subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1513 + if (!subsys->model_number) { 1514 + ret = -ENOMEM; 1515 + goto free_subsys; 1516 + } 1525 1517 1526 1518 switch (type) { 1527 1519 case NVME_NQN_NVME: ··· 1539 1517 break; 1540 1518 default: 1541 1519 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1542 - kfree(subsys); 1543 - return ERR_PTR(-EINVAL); 1520 + ret = -EINVAL; 1521 + goto free_mn; 1544 1522 } 1545 1523 subsys->type = type; 1546 1524 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1547 1525 GFP_KERNEL); 1548 1526 if (!subsys->subsysnqn) { 1549 - kfree(subsys); 1550 - return ERR_PTR(-ENOMEM); 1527 + ret = -ENOMEM; 1528 + goto free_mn; 1551 1529 } 1552 1530 subsys->cntlid_min = NVME_CNTLID_MIN; 1553 1531 subsys->cntlid_max = NVME_CNTLID_MAX; ··· 1559 1537 INIT_LIST_HEAD(&subsys->hosts); 1560 1538 1561 1539 return subsys; 1540 + 1541 + free_mn: 1542 + kfree(subsys->model_number); 1543 + free_subsys: 1544 + kfree(subsys); 1545 + return ERR_PTR(ret); 1562 1546 } 1563 1547 1564 1548 static void nvmet_subsys_free(struct kref *ref) ··· 1603 1575 1604 1576 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1605 1577 1578 + zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); 1579 + if (!zbd_wq) 1580 + return -ENOMEM; 1581 + 1606 1582 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1607 1583 WQ_MEM_RECLAIM, 0); 1608 1584 if (!buffered_io_wq) { 1609 1585 error = -ENOMEM; 1610 - goto out; 1586 + goto out_free_zbd_work_queue; 1611 1587 } 1612 1588 1613 1589 error = nvmet_init_discovery(); ··· 1627 1595 nvmet_exit_discovery(); 1628 1596 out_free_work_queue: 1629 1597 destroy_workqueue(buffered_io_wq); 1630 - out: 1598 + out_free_zbd_work_queue: 1599 + destroy_workqueue(zbd_wq); 1631 1600 return error; 1632 1601 } 1633 1602 ··· 1638 1605 nvmet_exit_discovery(); 1639 1606 ida_destroy(&cntlid_ida); 1640 1607 destroy_workqueue(buffered_io_wq); 1608 + destroy_workqueue(zbd_wq); 1641 1609 1642 1610 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1643 1611 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);

+3 -5

drivers/nvme/target/discovery.c

··· 244 244 { 245 245 struct nvmet_ctrl *ctrl = req->sq->ctrl; 246 246 struct nvme_id_ctrl *id; 247 - const char model[] = "Linux"; 248 247 u16 status = 0; 249 248 250 249 if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) ··· 261 262 goto out; 262 263 } 263 264 264 - memset(id->sn, ' ', sizeof(id->sn)); 265 - bin2hex(id->sn, &ctrl->subsys->serial, 266 - min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); 265 + memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE); 267 266 memset(id->fr, ' ', sizeof(id->fr)); 268 - memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); 267 + memcpy_and_pad(id->mn, sizeof(id->mn), ctrl->subsys->model_number, 268 + strlen(ctrl->subsys->model_number), ' '); 269 269 memcpy_and_pad(id->fr, sizeof(id->fr), 270 270 UTS_RELEASE, strlen(UTS_RELEASE), ' '); 271 271

+2 -8

drivers/nvme/target/fc.c

··· 2511 2511 int ret; 2512 2512 2513 2513 /* 2514 - * if there is no nvmet mapping to the targetport there 2515 - * shouldn't be requests. just terminate them. 2516 - */ 2517 - if (!tgtport->pe) 2518 - goto transport_error; 2519 - 2520 - /* 2521 2514 * Fused commands are currently not supported in the linux 2522 2515 * implementation. 2523 2516 * ··· 2537 2544 2538 2545 fod->req.cmd = &fod->cmdiubuf.sqe; 2539 2546 fod->req.cqe = &fod->rspiubuf.cqe; 2540 - fod->req.port = tgtport->pe->port; 2547 + if (tgtport->pe) 2548 + fod->req.port = tgtport->pe->port; 2541 2549 2542 2550 /* clear any response payload */ 2543 2551 memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));

+20 -16

drivers/nvme/target/io-cmd-bdev.c

··· 47 47 id->nows = to0based(ql->io_opt / ql->logical_block_size); 48 48 } 49 49 50 + void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 51 + { 52 + if (ns->bdev) { 53 + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 54 + ns->bdev = NULL; 55 + } 56 + } 57 + 50 58 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) 51 59 { 52 60 struct blk_integrity *bi = bdev_get_integrity(ns->bdev); ··· 94 86 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) 95 87 nvmet_bdev_ns_enable_integrity(ns); 96 88 97 - return 0; 98 - } 99 - 100 - void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 101 - { 102 - if (ns->bdev) { 103 - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 104 - ns->bdev = NULL; 89 + if (bdev_is_zoned(ns->bdev)) { 90 + if (!nvmet_bdev_zns_enable(ns)) { 91 + nvmet_bdev_ns_disable(ns); 92 + return -EINVAL; 93 + } 94 + ns->csi = NVME_CSI_ZNS; 105 95 } 96 + 97 + return 0; 106 98 } 107 99 108 100 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) ··· 110 102 ns->size = i_size_read(ns->bdev->bd_inode); 111 103 } 112 104 113 - static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 105 + u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 114 106 { 115 107 u16 status = NVME_SC_SUCCESS; 116 108 ··· 172 164 struct nvmet_req *req = bio->bi_private; 173 165 174 166 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 175 - if (bio != &req->b.inline_bio) 176 - bio_put(bio); 167 + nvmet_req_bio_put(req, bio); 177 168 } 178 169 179 170 #ifdef CONFIG_BLK_DEV_INTEGRITY ··· 181 174 { 182 175 struct blk_integrity *bi; 183 176 struct bio_integrity_payload *bip; 184 - struct block_device *bdev = req->ns->bdev; 185 177 int rc; 186 178 size_t resid, len; 187 179 188 - bi = bdev_get_integrity(bdev); 180 + bi = bdev_get_integrity(req->ns->bdev); 189 181 if (unlikely(!bi)) { 190 182 pr_err("Unable to locate bio_integrity\n"); 191 183 return -ENODEV; ··· 436 430 437 431 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) 438 432 { 439 - struct nvme_command *cmd = req->cmd; 440 - 441 - switch (cmd->common.opcode) { 433 + switch (req->cmd->common.opcode) { 442 434 case nvme_cmd_read: 443 435 case nvme_cmd_write: 444 436 req->execute = nvmet_bdev_execute_rw;

+1 -3

drivers/nvme/target/io-cmd-file.c

··· 385 385 386 386 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) 387 387 { 388 - struct nvme_command *cmd = req->cmd; 389 - 390 - switch (cmd->common.opcode) { 388 + switch (req->cmd->common.opcode) { 391 389 case nvme_cmd_read: 392 390 case nvme_cmd_write: 393 391 req->execute = nvmet_file_execute_rw;

+39 -2

drivers/nvme/target/nvmet.h

··· 28 28 #define NVMET_NO_ERROR_LOC ((u16)-1) 29 29 #define NVMET_DEFAULT_CTRL_MODEL "Linux" 30 30 #define NVMET_MN_MAX_SIZE 40 31 + #define NVMET_SN_MAX_SIZE 20 31 32 32 33 /* 33 34 * Supported optional AENs: ··· 83 82 struct pci_dev *p2p_dev; 84 83 int pi_type; 85 84 int metadata_size; 85 + u8 csi; 86 86 }; 87 87 88 88 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) ··· 219 217 220 218 struct xarray namespaces; 221 219 unsigned int nr_namespaces; 222 - unsigned int max_nsid; 220 + u32 max_nsid; 223 221 u16 cntlid_min; 224 222 u16 cntlid_max; 225 223 ··· 231 229 u16 max_qid; 232 230 233 231 u64 ver; 234 - u64 serial; 232 + char serial[NVMET_SN_MAX_SIZE]; 233 + bool subsys_discovered; 235 234 char *subsysnqn; 236 235 bool pi_support; 237 236 ··· 250 247 unsigned int admin_timeout; 251 248 unsigned int io_timeout; 252 249 #endif /* CONFIG_NVME_TARGET_PASSTHRU */ 250 + 251 + #ifdef CONFIG_BLK_DEV_ZONED 252 + u8 zasl; 253 + #endif /* CONFIG_BLK_DEV_ZONED */ 253 254 }; 254 255 255 256 static inline struct nvmet_subsys *to_subsys(struct config_item *item) ··· 339 332 struct work_struct work; 340 333 bool use_workqueue; 341 334 } p; 335 + #ifdef CONFIG_BLK_DEV_ZONED 336 + struct { 337 + struct bio inline_bio; 338 + struct work_struct zmgmt_work; 339 + } z; 340 + #endif /* CONFIG_BLK_DEV_ZONED */ 342 341 }; 343 342 int sg_cnt; 344 343 int metadata_sg_cnt; ··· 364 351 }; 365 352 366 353 extern struct workqueue_struct *buffered_io_wq; 354 + extern struct workqueue_struct *zbd_wq; 367 355 368 356 static inline void nvmet_set_result(struct nvmet_req *req, u32 result) 369 357 { ··· 414 400 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id); 415 401 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); 416 402 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); 403 + u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req); 417 404 u16 nvmet_parse_admin_cmd(struct nvmet_req *req); 418 405 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); 419 406 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); ··· 542 527 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); 543 528 int nvmet_file_ns_revalidate(struct nvmet_ns *ns); 544 529 void nvmet_ns_revalidate(struct nvmet_ns *ns); 530 + u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); 531 + 532 + bool nvmet_bdev_zns_enable(struct nvmet_ns *ns); 533 + void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req); 534 + void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req); 535 + void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req); 536 + void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req); 537 + void nvmet_bdev_execute_zone_append(struct nvmet_req *req); 545 538 546 539 static inline u32 nvmet_rw_data_len(struct nvmet_req *req) 547 540 { ··· 643 620 { 644 621 return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN && 645 622 req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC; 623 + } 624 + 625 + static inline void nvmet_req_cns_error_complete(struct nvmet_req *req) 626 + { 627 + pr_debug("unhandled identify cns %d on qid %d\n", 628 + req->cmd->identify.cns, req->sq->qid); 629 + req->error_loc = offsetof(struct nvme_identify, cns); 630 + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); 631 + } 632 + 633 + static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) 634 + { 635 + if (bio != &req->b.inline_bio) 636 + bio_put(bio); 646 637 } 647 638 648 639 #endif /* _NVMET_H */

+1 -2

drivers/nvme/target/passthru.c

··· 206 206 for_each_sg(req->sg, sg, req->sg_cnt, i) { 207 207 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length, 208 208 sg->offset) < sg->length) { 209 - if (bio != &req->p.inline_bio) 210 - bio_put(bio); 209 + nvmet_req_bio_put(req, bio); 211 210 return -EINVAL; 212 211 } 213 212 }

+1 -2

drivers/nvme/target/rdma.c

··· 1257 1257 1258 1258 static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) 1259 1259 { 1260 - struct ib_qp_init_attr qp_attr; 1260 + struct ib_qp_init_attr qp_attr = { }; 1261 1261 struct nvmet_rdma_device *ndev = queue->dev; 1262 1262 int nr_cqe, ret, i, factor; 1263 1263 ··· 1275 1275 goto out; 1276 1276 } 1277 1277 1278 - memset(&qp_attr, 0, sizeof(qp_attr)); 1279 1278 qp_attr.qp_context = queue; 1280 1279 qp_attr.event_handler = nvmet_rdma_qp_event; 1281 1280 qp_attr.send_cq = queue->cq;

+615

drivers/nvme/target/zns.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * NVMe ZNS-ZBD command implementation. 4 + * Copyright (C) 2021 Western Digital Corporation or its affiliates. 5 + */ 6 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 + #include <linux/nvme.h> 8 + #include <linux/blkdev.h> 9 + #include "nvmet.h" 10 + 11 + /* 12 + * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0 13 + * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k 14 + * as page_shift value. When calculating the ZASL use shift by 12. 15 + */ 16 + #define NVMET_MPSMIN_SHIFT 12 17 + 18 + static inline u8 nvmet_zasl(unsigned int zone_append_sects) 19 + { 20 + /* 21 + * Zone Append Size Limit (zasl) is expressed as a power of 2 value 22 + * with the minimum memory page size (i.e. 12) as unit. 23 + */ 24 + return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9)); 25 + } 26 + 27 + static int validate_conv_zones_cb(struct blk_zone *z, 28 + unsigned int i, void *data) 29 + { 30 + if (z->type == BLK_ZONE_TYPE_CONVENTIONAL) 31 + return -EOPNOTSUPP; 32 + return 0; 33 + } 34 + 35 + bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) 36 + { 37 + struct request_queue *q = ns->bdev->bd_disk->queue; 38 + u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); 39 + struct gendisk *bd_disk = ns->bdev->bd_disk; 40 + int ret; 41 + 42 + if (ns->subsys->zasl) { 43 + if (ns->subsys->zasl > zasl) 44 + return false; 45 + } 46 + ns->subsys->zasl = zasl; 47 + 48 + /* 49 + * Generic zoned block devices may have a smaller last zone which is 50 + * not supported by ZNS. Exclude zoned drives that have such smaller 51 + * last zone. 52 + */ 53 + if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1)) 54 + return false; 55 + /* 56 + * ZNS does not define a conventional zone type. If the underlying 57 + * device has a bitmap set indicating the existence of conventional 58 + * zones, reject the device. Otherwise, use report zones to detect if 59 + * the device has conventional zones. 60 + */ 61 + if (ns->bdev->bd_disk->queue->conv_zones_bitmap) 62 + return false; 63 + 64 + ret = blkdev_report_zones(ns->bdev, 0, blkdev_nr_zones(bd_disk), 65 + validate_conv_zones_cb, NULL); 66 + if (ret < 0) 67 + return false; 68 + 69 + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 70 + 71 + return true; 72 + } 73 + 74 + void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req) 75 + { 76 + u8 zasl = req->sq->ctrl->subsys->zasl; 77 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 78 + struct nvme_id_ctrl_zns *id; 79 + u16 status; 80 + 81 + id = kzalloc(sizeof(*id), GFP_KERNEL); 82 + if (!id) { 83 + status = NVME_SC_INTERNAL; 84 + goto out; 85 + } 86 + 87 + if (ctrl->ops->get_mdts) 88 + id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl); 89 + else 90 + id->zasl = zasl; 91 + 92 + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 93 + 94 + kfree(id); 95 + out: 96 + nvmet_req_complete(req, status); 97 + } 98 + 99 + void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) 100 + { 101 + struct nvme_id_ns_zns *id_zns; 102 + u64 zsze; 103 + u16 status; 104 + 105 + if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) { 106 + req->error_loc = offsetof(struct nvme_identify, nsid); 107 + status = NVME_SC_INVALID_NS | NVME_SC_DNR; 108 + goto out; 109 + } 110 + 111 + id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL); 112 + if (!id_zns) { 113 + status = NVME_SC_INTERNAL; 114 + goto out; 115 + } 116 + 117 + status = nvmet_req_find_ns(req); 118 + if (status) { 119 + status = NVME_SC_INTERNAL; 120 + goto done; 121 + } 122 + 123 + if (!bdev_is_zoned(req->ns->bdev)) { 124 + req->error_loc = offsetof(struct nvme_identify, nsid); 125 + status = NVME_SC_INVALID_NS | NVME_SC_DNR; 126 + goto done; 127 + } 128 + 129 + nvmet_ns_revalidate(req->ns); 130 + zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> 131 + req->ns->blksize_shift; 132 + id_zns->lbafe[0].zsze = cpu_to_le64(zsze); 133 + id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev)); 134 + id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev)); 135 + 136 + done: 137 + status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns)); 138 + kfree(id_zns); 139 + out: 140 + nvmet_req_complete(req, status); 141 + } 142 + 143 + static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req) 144 + { 145 + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); 146 + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 147 + 148 + if (sect >= get_capacity(req->ns->bdev->bd_disk)) { 149 + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba); 150 + return NVME_SC_LBA_RANGE | NVME_SC_DNR; 151 + } 152 + 153 + if (out_bufsize < sizeof(struct nvme_zone_report)) { 154 + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd); 155 + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 156 + } 157 + 158 + if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) { 159 + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra); 160 + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 161 + } 162 + 163 + switch (req->cmd->zmr.pr) { 164 + case 0: 165 + case 1: 166 + break; 167 + default: 168 + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr); 169 + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 170 + } 171 + 172 + switch (req->cmd->zmr.zrasf) { 173 + case NVME_ZRASF_ZONE_REPORT_ALL: 174 + case NVME_ZRASF_ZONE_STATE_EMPTY: 175 + case NVME_ZRASF_ZONE_STATE_IMP_OPEN: 176 + case NVME_ZRASF_ZONE_STATE_EXP_OPEN: 177 + case NVME_ZRASF_ZONE_STATE_CLOSED: 178 + case NVME_ZRASF_ZONE_STATE_FULL: 179 + case NVME_ZRASF_ZONE_STATE_READONLY: 180 + case NVME_ZRASF_ZONE_STATE_OFFLINE: 181 + break; 182 + default: 183 + req->error_loc = 184 + offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf); 185 + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 186 + } 187 + 188 + return NVME_SC_SUCCESS; 189 + } 190 + 191 + struct nvmet_report_zone_data { 192 + struct nvmet_req *req; 193 + u64 out_buf_offset; 194 + u64 out_nr_zones; 195 + u64 nr_zones; 196 + u8 zrasf; 197 + }; 198 + 199 + static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d) 200 + { 201 + static const unsigned int nvme_zrasf_to_blk_zcond[] = { 202 + [NVME_ZRASF_ZONE_STATE_EMPTY] = BLK_ZONE_COND_EMPTY, 203 + [NVME_ZRASF_ZONE_STATE_IMP_OPEN] = BLK_ZONE_COND_IMP_OPEN, 204 + [NVME_ZRASF_ZONE_STATE_EXP_OPEN] = BLK_ZONE_COND_EXP_OPEN, 205 + [NVME_ZRASF_ZONE_STATE_CLOSED] = BLK_ZONE_COND_CLOSED, 206 + [NVME_ZRASF_ZONE_STATE_READONLY] = BLK_ZONE_COND_READONLY, 207 + [NVME_ZRASF_ZONE_STATE_FULL] = BLK_ZONE_COND_FULL, 208 + [NVME_ZRASF_ZONE_STATE_OFFLINE] = BLK_ZONE_COND_OFFLINE, 209 + }; 210 + struct nvmet_report_zone_data *rz = d; 211 + 212 + if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL && 213 + z->cond != nvme_zrasf_to_blk_zcond[rz->zrasf]) 214 + return 0; 215 + 216 + if (rz->nr_zones < rz->out_nr_zones) { 217 + struct nvme_zone_descriptor zdesc = { }; 218 + u16 status; 219 + 220 + zdesc.zcap = nvmet_sect_to_lba(rz->req->ns, z->capacity); 221 + zdesc.zslba = nvmet_sect_to_lba(rz->req->ns, z->start); 222 + zdesc.wp = nvmet_sect_to_lba(rz->req->ns, z->wp); 223 + zdesc.za = z->reset ? 1 << 2 : 0; 224 + zdesc.zs = z->cond << 4; 225 + zdesc.zt = z->type; 226 + 227 + status = nvmet_copy_to_sgl(rz->req, rz->out_buf_offset, &zdesc, 228 + sizeof(zdesc)); 229 + if (status) 230 + return -EINVAL; 231 + 232 + rz->out_buf_offset += sizeof(zdesc); 233 + } 234 + 235 + rz->nr_zones++; 236 + 237 + return 0; 238 + } 239 + 240 + static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req) 241 + { 242 + unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); 243 + 244 + return blkdev_nr_zones(req->ns->bdev->bd_disk) - 245 + (sect >> ilog2(bdev_zone_sectors(req->ns->bdev))); 246 + } 247 + 248 + static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize) 249 + { 250 + if (bufsize <= sizeof(struct nvme_zone_report)) 251 + return 0; 252 + 253 + return (bufsize - sizeof(struct nvme_zone_report)) / 254 + sizeof(struct nvme_zone_descriptor); 255 + } 256 + 257 + static void nvmet_bdev_zone_zmgmt_recv_work(struct work_struct *w) 258 + { 259 + struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work); 260 + sector_t start_sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); 261 + unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req); 262 + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 263 + __le64 nr_zones; 264 + u16 status; 265 + int ret; 266 + struct nvmet_report_zone_data rz_data = { 267 + .out_nr_zones = get_nr_zones_from_buf(req, out_bufsize), 268 + /* leave the place for report zone header */ 269 + .out_buf_offset = sizeof(struct nvme_zone_report), 270 + .zrasf = req->cmd->zmr.zrasf, 271 + .nr_zones = 0, 272 + .req = req, 273 + }; 274 + 275 + status = nvmet_bdev_validate_zone_mgmt_recv(req); 276 + if (status) 277 + goto out; 278 + 279 + if (!req_slba_nr_zones) { 280 + status = NVME_SC_SUCCESS; 281 + goto out; 282 + } 283 + 284 + ret = blkdev_report_zones(req->ns->bdev, start_sect, req_slba_nr_zones, 285 + nvmet_bdev_report_zone_cb, &rz_data); 286 + if (ret < 0) { 287 + status = NVME_SC_INTERNAL; 288 + goto out; 289 + } 290 + 291 + /* 292 + * When partial bit is set nr_zones must indicate the number of zone 293 + * descriptors actually transferred. 294 + */ 295 + if (req->cmd->zmr.pr) 296 + rz_data.nr_zones = min(rz_data.nr_zones, rz_data.out_nr_zones); 297 + 298 + nr_zones = cpu_to_le64(rz_data.nr_zones); 299 + status = nvmet_copy_to_sgl(req, 0, &nr_zones, sizeof(nr_zones)); 300 + 301 + out: 302 + nvmet_req_complete(req, status); 303 + } 304 + 305 + void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req) 306 + { 307 + INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zone_zmgmt_recv_work); 308 + queue_work(zbd_wq, &req->z.zmgmt_work); 309 + } 310 + 311 + static inline enum req_opf zsa_req_op(u8 zsa) 312 + { 313 + switch (zsa) { 314 + case NVME_ZONE_OPEN: 315 + return REQ_OP_ZONE_OPEN; 316 + case NVME_ZONE_CLOSE: 317 + return REQ_OP_ZONE_CLOSE; 318 + case NVME_ZONE_FINISH: 319 + return REQ_OP_ZONE_FINISH; 320 + case NVME_ZONE_RESET: 321 + return REQ_OP_ZONE_RESET; 322 + default: 323 + return REQ_OP_LAST; 324 + } 325 + } 326 + 327 + static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret) 328 + { 329 + switch (ret) { 330 + case 0: 331 + return NVME_SC_SUCCESS; 332 + case -EINVAL: 333 + case -EIO: 334 + return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR; 335 + default: 336 + return NVME_SC_INTERNAL; 337 + } 338 + } 339 + 340 + struct nvmet_zone_mgmt_send_all_data { 341 + unsigned long *zbitmap; 342 + struct nvmet_req *req; 343 + }; 344 + 345 + static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d) 346 + { 347 + struct nvmet_zone_mgmt_send_all_data *data = d; 348 + 349 + switch (zsa_req_op(data->req->cmd->zms.zsa)) { 350 + case REQ_OP_ZONE_OPEN: 351 + switch (z->cond) { 352 + case BLK_ZONE_COND_CLOSED: 353 + break; 354 + default: 355 + return 0; 356 + } 357 + break; 358 + case REQ_OP_ZONE_CLOSE: 359 + switch (z->cond) { 360 + case BLK_ZONE_COND_IMP_OPEN: 361 + case BLK_ZONE_COND_EXP_OPEN: 362 + break; 363 + default: 364 + return 0; 365 + } 366 + break; 367 + case REQ_OP_ZONE_FINISH: 368 + switch (z->cond) { 369 + case BLK_ZONE_COND_IMP_OPEN: 370 + case BLK_ZONE_COND_EXP_OPEN: 371 + case BLK_ZONE_COND_CLOSED: 372 + break; 373 + default: 374 + return 0; 375 + } 376 + break; 377 + default: 378 + return -EINVAL; 379 + } 380 + 381 + set_bit(i, data->zbitmap); 382 + 383 + return 0; 384 + } 385 + 386 + static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) 387 + { 388 + struct block_device *bdev = req->ns->bdev; 389 + unsigned int nr_zones = blkdev_nr_zones(bdev->bd_disk); 390 + struct request_queue *q = bdev_get_queue(bdev); 391 + struct bio *bio = NULL; 392 + sector_t sector = 0; 393 + int ret; 394 + struct nvmet_zone_mgmt_send_all_data d = { 395 + .req = req, 396 + }; 397 + 398 + d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)), 399 + GFP_NOIO, q->node); 400 + if (!d.zbitmap) { 401 + ret = -ENOMEM; 402 + goto out; 403 + } 404 + 405 + /* Scan and build bitmap of the eligible zones */ 406 + ret = blkdev_report_zones(bdev, 0, nr_zones, zmgmt_send_scan_cb, &d); 407 + if (ret != nr_zones) { 408 + if (ret > 0) 409 + ret = -EIO; 410 + goto out; 411 + } else { 412 + /* We scanned all the zones */ 413 + ret = 0; 414 + } 415 + 416 + while (sector < get_capacity(bdev->bd_disk)) { 417 + if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) { 418 + bio = blk_next_bio(bio, 0, GFP_KERNEL); 419 + bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC; 420 + bio->bi_iter.bi_sector = sector; 421 + bio_set_dev(bio, bdev); 422 + /* This may take a while, so be nice to others */ 423 + cond_resched(); 424 + } 425 + sector += blk_queue_zone_sectors(q); 426 + } 427 + 428 + if (bio) { 429 + ret = submit_bio_wait(bio); 430 + bio_put(bio); 431 + } 432 + 433 + out: 434 + kfree(d.zbitmap); 435 + 436 + return blkdev_zone_mgmt_errno_to_nvme_status(ret); 437 + } 438 + 439 + static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req) 440 + { 441 + int ret; 442 + 443 + switch (zsa_req_op(req->cmd->zms.zsa)) { 444 + case REQ_OP_ZONE_RESET: 445 + ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0, 446 + get_capacity(req->ns->bdev->bd_disk), 447 + GFP_KERNEL); 448 + if (ret < 0) 449 + return blkdev_zone_mgmt_errno_to_nvme_status(ret); 450 + break; 451 + case REQ_OP_ZONE_OPEN: 452 + case REQ_OP_ZONE_CLOSE: 453 + case REQ_OP_ZONE_FINISH: 454 + return nvmet_bdev_zone_mgmt_emulate_all(req); 455 + default: 456 + /* this is needed to quiet compiler warning */ 457 + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa); 458 + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 459 + } 460 + 461 + return NVME_SC_SUCCESS; 462 + } 463 + 464 + static void nvmet_bdev_zmgmt_send_work(struct work_struct *w) 465 + { 466 + struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work); 467 + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba); 468 + enum req_opf op = zsa_req_op(req->cmd->zms.zsa); 469 + struct block_device *bdev = req->ns->bdev; 470 + sector_t zone_sectors = bdev_zone_sectors(bdev); 471 + u16 status = NVME_SC_SUCCESS; 472 + int ret; 473 + 474 + if (op == REQ_OP_LAST) { 475 + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa); 476 + status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR; 477 + goto out; 478 + } 479 + 480 + /* when select all bit is set slba field is ignored */ 481 + if (req->cmd->zms.select_all) { 482 + status = nvmet_bdev_execute_zmgmt_send_all(req); 483 + goto out; 484 + } 485 + 486 + if (sect >= get_capacity(bdev->bd_disk)) { 487 + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba); 488 + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 489 + goto out; 490 + } 491 + 492 + if (sect & (zone_sectors - 1)) { 493 + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba); 494 + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 495 + goto out; 496 + } 497 + 498 + ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL); 499 + if (ret < 0) 500 + status = blkdev_zone_mgmt_errno_to_nvme_status(ret); 501 + 502 + out: 503 + nvmet_req_complete(req, status); 504 + } 505 + 506 + void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req) 507 + { 508 + INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zmgmt_send_work); 509 + queue_work(zbd_wq, &req->z.zmgmt_work); 510 + } 511 + 512 + static void nvmet_bdev_zone_append_bio_done(struct bio *bio) 513 + { 514 + struct nvmet_req *req = bio->bi_private; 515 + 516 + if (bio->bi_status == BLK_STS_OK) { 517 + req->cqe->result.u64 = 518 + nvmet_sect_to_lba(req->ns, bio->bi_iter.bi_sector); 519 + } 520 + 521 + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 522 + nvmet_req_bio_put(req, bio); 523 + } 524 + 525 + void nvmet_bdev_execute_zone_append(struct nvmet_req *req) 526 + { 527 + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); 528 + u16 status = NVME_SC_SUCCESS; 529 + unsigned int total_len = 0; 530 + struct scatterlist *sg; 531 + struct bio *bio; 532 + int sg_cnt; 533 + 534 + /* Request is completed on len mismatch in nvmet_check_transter_len() */ 535 + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) 536 + return; 537 + 538 + if (!req->sg_cnt) { 539 + nvmet_req_complete(req, 0); 540 + return; 541 + } 542 + 543 + if (sect >= get_capacity(req->ns->bdev->bd_disk)) { 544 + req->error_loc = offsetof(struct nvme_rw_command, slba); 545 + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 546 + goto out; 547 + } 548 + 549 + if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) { 550 + req->error_loc = offsetof(struct nvme_rw_command, slba); 551 + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 552 + goto out; 553 + } 554 + 555 + if (nvmet_use_inline_bvec(req)) { 556 + bio = &req->z.inline_bio; 557 + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 558 + } else { 559 + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); 560 + } 561 + 562 + bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; 563 + bio->bi_end_io = nvmet_bdev_zone_append_bio_done; 564 + bio_set_dev(bio, req->ns->bdev); 565 + bio->bi_iter.bi_sector = sect; 566 + bio->bi_private = req; 567 + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 568 + bio->bi_opf |= REQ_FUA; 569 + 570 + for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) { 571 + struct page *p = sg_page(sg); 572 + unsigned int l = sg->length; 573 + unsigned int o = sg->offset; 574 + unsigned int ret; 575 + 576 + ret = bio_add_zone_append_page(bio, p, l, o); 577 + if (ret != sg->length) { 578 + status = NVME_SC_INTERNAL; 579 + goto out_put_bio; 580 + } 581 + total_len += sg->length; 582 + } 583 + 584 + if (total_len != nvmet_rw_data_len(req)) { 585 + status = NVME_SC_INTERNAL | NVME_SC_DNR; 586 + goto out_put_bio; 587 + } 588 + 589 + submit_bio(bio); 590 + return; 591 + 592 + out_put_bio: 593 + nvmet_req_bio_put(req, bio); 594 + out: 595 + nvmet_req_complete(req, status); 596 + } 597 + 598 + u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req) 599 + { 600 + struct nvme_command *cmd = req->cmd; 601 + 602 + switch (cmd->common.opcode) { 603 + case nvme_cmd_zone_append: 604 + req->execute = nvmet_bdev_execute_zone_append; 605 + return 0; 606 + case nvme_cmd_zone_mgmt_recv: 607 + req->execute = nvmet_bdev_execute_zone_mgmt_recv; 608 + return 0; 609 + case nvme_cmd_zone_mgmt_send: 610 + req->execute = nvmet_bdev_execute_zone_mgmt_send; 611 + return 0; 612 + default: 613 + return nvmet_bdev_parse_io_cmd(req); 614 + } 615 + }

+1

fs/pstore/Kconfig

··· 173 173 tristate "Log panic/oops to a block device" 174 174 depends on PSTORE 175 175 depends on BLOCK 176 + depends on BROKEN 176 177 select PSTORE_ZONE 177 178 default n 178 179 help

+5

include/linux/acpi.h

··· 1005 1005 int acpi_subsys_runtime_suspend(struct device *dev); 1006 1006 int acpi_subsys_runtime_resume(struct device *dev); 1007 1007 int acpi_dev_pm_attach(struct device *dev, bool power_on); 1008 + bool acpi_storage_d3(struct device *dev); 1008 1009 #else 1009 1010 static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } 1010 1011 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } 1011 1012 static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) 1012 1013 { 1013 1014 return 0; 1015 + } 1016 + static inline bool acpi_storage_d3(struct device *dev) 1017 + { 1018 + return false; 1014 1019 } 1015 1020 #endif 1016 1021

+2

include/linux/bio.h

··· 818 818 bio->bi_opf |= REQ_NOWAIT; 819 819 } 820 820 821 + struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); 822 + 821 823 #endif /* __LINUX_BIO_H */

+10 -2

include/linux/nvme.h

··· 636 636 __u8 type; 637 637 __u8 attributes; 638 638 __u8 rsvd2[14]; 639 - __u64 slba; 640 - __u64 nlb; 639 + __le64 slba; 640 + __le64 nlb; 641 641 __u8 guid[16]; 642 642 __u8 rsvd48[16]; 643 643 }; ··· 944 944 enum { 945 945 NVME_ZRA_ZONE_REPORT = 0, 946 946 NVME_ZRASF_ZONE_REPORT_ALL = 0, 947 + NVME_ZRASF_ZONE_STATE_EMPTY = 0x01, 948 + NVME_ZRASF_ZONE_STATE_IMP_OPEN = 0x02, 949 + NVME_ZRASF_ZONE_STATE_EXP_OPEN = 0x03, 950 + NVME_ZRASF_ZONE_STATE_CLOSED = 0x04, 951 + NVME_ZRASF_ZONE_STATE_READONLY = 0x05, 952 + NVME_ZRASF_ZONE_STATE_FULL = 0x06, 953 + NVME_ZRASF_ZONE_STATE_OFFLINE = 0x07, 947 954 NVME_REPORT_ZONE_PARTIAL = 1, 948 955 }; 949 956 ··· 1511 1504 NVME_SC_NS_WRITE_PROTECTED = 0x20, 1512 1505 NVME_SC_CMD_INTERRUPTED = 0x21, 1513 1506 NVME_SC_TRANSIENT_TR_ERR = 0x22, 1507 + NVME_SC_INVALID_IO_CMD_SET = 0x2C, 1514 1508 1515 1509 NVME_SC_LBA_RANGE = 0x80, 1516 1510 NVME_SC_CAP_EXCEEDED = 0x81,