commit 096c7a6d90082586ff265d99e8e4a052dee3a403 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge branch 'nvme-5.2-rc2' of git://git.infradead.org/nvme into for-linus

Pull NVMe changes from Keith.

* 'nvme-5.2-rc2' of git://git.infradead.org/nvme:
nvme-pci: use blk-mq mapping for unmanaged irqs
nvme: update MAINTAINERS
nvme: copy MTFA field from identify controller
nvme: fix memory leak for power latency tolerance
nvme: release namespace SRCU protection before performing controller ioctls
nvme: merge nvme_ns_ioctl into nvme_ioctl
nvme: remove the ifdef around nvme_nvm_ioctl
nvme: fix srcu locking on error return in nvme_get_ns_from_disk
nvme: Fix known effects
nvme-pci: Sync queues on reset
nvme-pci: Unblock reset_work on IO failure
nvme-pci: Don't disable on timeout in reset state
nvme-pci: Fix controller freeze wait disabling

Jens Axboe 6 years ago 096c7a6d 004d564f

+78 -43

4 changed files

expand all

unified split

MAINTAINERS

drivers

nvme

host

core.c

nvme.h

pci.c

+1 -1

MAINTAINERS

··· 11227 F: drivers/video/fbdev/nvidia/ 11228 11229 NVM EXPRESS DRIVER 11230 - M: Keith Busch <keith.busch@intel.com> 11231 M: Jens Axboe <axboe@fb.com> 11232 M: Christoph Hellwig <hch@lst.de> 11233 M: Sagi Grimberg <sagi@grimberg.me>

··· 11227 F: drivers/video/fbdev/nvidia/ 11228 11229 NVM EXPRESS DRIVER 11230 + M: Keith Busch <kbusch@kernel.org> 11231 M: Jens Axboe <axboe@fb.com> 11232 M: Christoph Hellwig <hch@lst.de> 11233 M: Sagi Grimberg <sagi@grimberg.me>

+62 -29

drivers/nvme/host/core.c

··· 1257 return 0; 1258 } 1259 1260 - effects |= nvme_known_admin_effects(opcode); 1261 if (ctrl->effects) 1262 effects = le32_to_cpu(ctrl->effects->acs[opcode]); 1263 1264 /* 1265 * For simplicity, IO to all namespaces is quiesced even if the command ··· 1361 { 1362 #ifdef CONFIG_NVME_MULTIPATH 1363 if (disk->fops == &nvme_ns_head_ops) { 1364 *head = disk->private_data; 1365 *srcu_idx = srcu_read_lock(&(*head)->srcu); 1366 - return nvme_find_path(*head); 1367 } 1368 #endif 1369 *head = NULL; ··· 1382 srcu_read_unlock(&head->srcu, idx); 1383 } 1384 1385 - static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg) 1386 - { 1387 - switch (cmd) { 1388 - case NVME_IOCTL_ID: 1389 - force_successful_syscall_return(); 1390 - return ns->head->ns_id; 1391 - case NVME_IOCTL_ADMIN_CMD: 1392 - return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); 1393 - case NVME_IOCTL_IO_CMD: 1394 - return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); 1395 - case NVME_IOCTL_SUBMIT_IO: 1396 - return nvme_submit_io(ns, (void __user *)arg); 1397 - default: 1398 - #ifdef CONFIG_NVM 1399 - if (ns->ndev) 1400 - return nvme_nvm_ioctl(ns, cmd, arg); 1401 - #endif 1402 - if (is_sed_ioctl(cmd)) 1403 - return sed_ioctl(ns->ctrl->opal_dev, cmd, 1404 - (void __user *) arg); 1405 - return -ENOTTY; 1406 - } 1407 - } 1408 - 1409 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, 1410 unsigned int cmd, unsigned long arg) 1411 { 1412 struct nvme_ns_head *head = NULL; 1413 struct nvme_ns *ns; 1414 int srcu_idx, ret; 1415 1416 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 1417 if (unlikely(!ns)) 1418 - ret = -EWOULDBLOCK; 1419 - else 1420 - ret = nvme_ns_ioctl(ns, cmd, arg); 1421 nvme_put_ns_from_disk(head, srcu_idx); 1422 return ret; 1423 } ··· 2576 2577 ctrl->oacs = le16_to_cpu(id->oacs); 2578 ctrl->oncs = le16_to_cpu(id->oncs); 2579 ctrl->oaes = le32_to_cpu(id->oaes); 2580 atomic_set(&ctrl->abort_limit, id->acl + 1); 2581 ctrl->vwc = id->vwc; ··· 3701 3702 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) 3703 { 3704 cdev_device_del(&ctrl->cdev, ctrl->device); 3705 } 3706 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); ··· 3900 up_read(&ctrl->namespaces_rwsem); 3901 } 3902 EXPORT_SYMBOL_GPL(nvme_start_queues); 3903 3904 /* 3905 * Check we didn't inadvertently grow the command structure sizes:

··· 1257 return 0; 1258 } 1259 1260 if (ctrl->effects) 1261 effects = le32_to_cpu(ctrl->effects->acs[opcode]); 1262 + effects |= nvme_known_admin_effects(opcode); 1263 1264 /* 1265 * For simplicity, IO to all namespaces is quiesced even if the command ··· 1361 { 1362 #ifdef CONFIG_NVME_MULTIPATH 1363 if (disk->fops == &nvme_ns_head_ops) { 1364 + struct nvme_ns *ns; 1365 + 1366 *head = disk->private_data; 1367 *srcu_idx = srcu_read_lock(&(*head)->srcu); 1368 + ns = nvme_find_path(*head); 1369 + if (!ns) 1370 + srcu_read_unlock(&(*head)->srcu, *srcu_idx); 1371 + return ns; 1372 } 1373 #endif 1374 *head = NULL; ··· 1377 srcu_read_unlock(&head->srcu, idx); 1378 } 1379 1380 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, 1381 unsigned int cmd, unsigned long arg) 1382 { 1383 struct nvme_ns_head *head = NULL; 1384 + void __user *argp = (void __user *)arg; 1385 struct nvme_ns *ns; 1386 int srcu_idx, ret; 1387 1388 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 1389 if (unlikely(!ns)) 1390 + return -EWOULDBLOCK; 1391 + 1392 + /* 1393 + * Handle ioctls that apply to the controller instead of the namespace 1394 + * seperately and drop the ns SRCU reference early. This avoids a 1395 + * deadlock when deleting namespaces using the passthrough interface. 1396 + */ 1397 + if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { 1398 + struct nvme_ctrl *ctrl = ns->ctrl; 1399 + 1400 + nvme_get_ctrl(ns->ctrl); 1401 + nvme_put_ns_from_disk(head, srcu_idx); 1402 + 1403 + if (cmd == NVME_IOCTL_ADMIN_CMD) 1404 + ret = nvme_user_cmd(ctrl, NULL, argp); 1405 + else 1406 + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); 1407 + 1408 + nvme_put_ctrl(ctrl); 1409 + return ret; 1410 + } 1411 + 1412 + switch (cmd) { 1413 + case NVME_IOCTL_ID: 1414 + force_successful_syscall_return(); 1415 + ret = ns->head->ns_id; 1416 + break; 1417 + case NVME_IOCTL_IO_CMD: 1418 + ret = nvme_user_cmd(ns->ctrl, ns, argp); 1419 + break; 1420 + case NVME_IOCTL_SUBMIT_IO: 1421 + ret = nvme_submit_io(ns, argp); 1422 + break; 1423 + default: 1424 + if (ns->ndev) 1425 + ret = nvme_nvm_ioctl(ns, cmd, arg); 1426 + else 1427 + ret = -ENOTTY; 1428 + } 1429 + 1430 nvme_put_ns_from_disk(head, srcu_idx); 1431 return ret; 1432 } ··· 2557 2558 ctrl->oacs = le16_to_cpu(id->oacs); 2559 ctrl->oncs = le16_to_cpu(id->oncs); 2560 + ctrl->mtfa = le16_to_cpu(id->mtfa); 2561 ctrl->oaes = le32_to_cpu(id->oaes); 2562 atomic_set(&ctrl->abort_limit, id->acl + 1); 2563 ctrl->vwc = id->vwc; ··· 3681 3682 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) 3683 { 3684 + dev_pm_qos_hide_latency_tolerance(ctrl->device); 3685 cdev_device_del(&ctrl->cdev, ctrl->device); 3686 } 3687 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); ··· 3879 up_read(&ctrl->namespaces_rwsem); 3880 } 3881 EXPORT_SYMBOL_GPL(nvme_start_queues); 3882 + 3883 + 3884 + void nvme_sync_queues(struct nvme_ctrl *ctrl) 3885 + { 3886 + struct nvme_ns *ns; 3887 + 3888 + down_read(&ctrl->namespaces_rwsem); 3889 + list_for_each_entry(ns, &ctrl->namespaces, list) 3890 + blk_sync_queue(ns->queue); 3891 + up_read(&ctrl->namespaces_rwsem); 3892 + } 3893 + EXPORT_SYMBOL_GPL(nvme_sync_queues); 3894 3895 /* 3896 * Check we didn't inadvertently grow the command structure sizes:

drivers/nvme/host/nvme.h

··· 441 void nvme_stop_queues(struct nvme_ctrl *ctrl); 442 void nvme_start_queues(struct nvme_ctrl *ctrl); 443 void nvme_kill_queues(struct nvme_ctrl *ctrl); 444 void nvme_unfreeze(struct nvme_ctrl *ctrl); 445 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 446 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);

··· 441 void nvme_stop_queues(struct nvme_ctrl *ctrl); 442 void nvme_start_queues(struct nvme_ctrl *ctrl); 443 void nvme_kill_queues(struct nvme_ctrl *ctrl); 444 + void nvme_sync_queues(struct nvme_ctrl *ctrl); 445 void nvme_unfreeze(struct nvme_ctrl *ctrl); 446 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 447 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);

+14 -13

drivers/nvme/host/pci.c

··· 464 * affinity), so use the regular blk-mq cpu mapping 465 */ 466 map->queue_offset = qoff; 467 - if (i != HCTX_TYPE_POLL) 468 blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); 469 else 470 blk_mq_map_queues(map); ··· 1257 struct nvme_dev *dev = nvmeq->dev; 1258 struct request *abort_req; 1259 struct nvme_command cmd; 1260 - bool shutdown = false; 1261 u32 csts = readl(dev->bar + NVME_REG_CSTS); 1262 1263 /* If PCI error recovery process is happening, we cannot reset or ··· 1293 * shutdown, so we return BLK_EH_DONE. 1294 */ 1295 switch (dev->ctrl.state) { 1296 - case NVME_CTRL_DELETING: 1297 - shutdown = true; 1298 - /* fall through */ 1299 case NVME_CTRL_CONNECTING: 1300 - case NVME_CTRL_RESETTING: 1301 dev_warn_ratelimited(dev->ctrl.device, 1302 "I/O %d QID %d timeout, disable controller\n", 1303 req->tag, nvmeq->qid); 1304 - nvme_dev_disable(dev, shutdown); 1305 nvme_req(req)->flags |= NVME_REQ_CANCELLED; 1306 return BLK_EH_DONE; 1307 default: 1308 break; 1309 } ··· 2376 2377 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 2378 { 2379 - bool dead = true; 2380 struct pci_dev *pdev = to_pci_dev(dev->dev); 2381 2382 mutex_lock(&dev->shutdown_lock); ··· 2384 u32 csts = readl(dev->bar + NVME_REG_CSTS); 2385 2386 if (dev->ctrl.state == NVME_CTRL_LIVE || 2387 - dev->ctrl.state == NVME_CTRL_RESETTING) 2388 nvme_start_freeze(&dev->ctrl); 2389 dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || 2390 pdev->error_state != pci_channel_io_normal); 2391 } ··· 2396 * Give the controller a chance to complete all entered requests if 2397 * doing a safe shutdown. 2398 */ 2399 - if (!dead) { 2400 - if (shutdown) 2401 - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 2402 - } 2403 2404 nvme_stop_queues(&dev->ctrl); 2405 ··· 2492 */ 2493 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) 2494 nvme_dev_disable(dev, false); 2495 2496 mutex_lock(&dev->shutdown_lock); 2497 result = nvme_pci_enable(dev);

··· 464 * affinity), so use the regular blk-mq cpu mapping 465 */ 466 map->queue_offset = qoff; 467 + if (i != HCTX_TYPE_POLL && offset) 468 blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); 469 else 470 blk_mq_map_queues(map); ··· 1257 struct nvme_dev *dev = nvmeq->dev; 1258 struct request *abort_req; 1259 struct nvme_command cmd; 1260 u32 csts = readl(dev->bar + NVME_REG_CSTS); 1261 1262 /* If PCI error recovery process is happening, we cannot reset or ··· 1294 * shutdown, so we return BLK_EH_DONE. 1295 */ 1296 switch (dev->ctrl.state) { 1297 case NVME_CTRL_CONNECTING: 1298 + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 1299 + /* fall through */ 1300 + case NVME_CTRL_DELETING: 1301 dev_warn_ratelimited(dev->ctrl.device, 1302 "I/O %d QID %d timeout, disable controller\n", 1303 req->tag, nvmeq->qid); 1304 + nvme_dev_disable(dev, true); 1305 nvme_req(req)->flags |= NVME_REQ_CANCELLED; 1306 return BLK_EH_DONE; 1307 + case NVME_CTRL_RESETTING: 1308 + return BLK_EH_RESET_TIMER; 1309 default: 1310 break; 1311 } ··· 2376 2377 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 2378 { 2379 + bool dead = true, freeze = false; 2380 struct pci_dev *pdev = to_pci_dev(dev->dev); 2381 2382 mutex_lock(&dev->shutdown_lock); ··· 2384 u32 csts = readl(dev->bar + NVME_REG_CSTS); 2385 2386 if (dev->ctrl.state == NVME_CTRL_LIVE || 2387 + dev->ctrl.state == NVME_CTRL_RESETTING) { 2388 + freeze = true; 2389 nvme_start_freeze(&dev->ctrl); 2390 + } 2391 dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || 2392 pdev->error_state != pci_channel_io_normal); 2393 } ··· 2394 * Give the controller a chance to complete all entered requests if 2395 * doing a safe shutdown. 2396 */ 2397 + if (!dead && shutdown && freeze) 2398 + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 2399 2400 nvme_stop_queues(&dev->ctrl); 2401 ··· 2492 */ 2493 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) 2494 nvme_dev_disable(dev, false); 2495 + nvme_sync_queues(&dev->ctrl); 2496 2497 mutex_lock(&dev->shutdown_lock); 2498 result = nvme_pci_enable(dev);