Merge tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- NVMe pull request from Christoph:
- fix a status code in nvmet (Chaitanya Kulkarni)
- avoid double completions in nvme-rdma/nvme-tcp (Chao Leng)
- fix the CMB support to cope with NVMe 1.4 controllers (Klaus Jensen)
- fix PRINFO handling in the passthrough ioctl (Revanth Rajashekar)
- fix a double DMA unmap in nvme-pci

- lightnvm error path leak fix (Pan)

- MD pull request from Song:
- Flush request fix (Xiao)

* tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block:
lightnvm: fix memory leak when submit fails
nvme-pci: fix error unwind in nvme_map_data
nvme-pci: refactor nvme_unmap_data
md: Set prev_flush_start and flush_bio in an atomic way
nvmet: set right status on error in id-ns handler
nvme-pci: allow use of cmb on v1.4 controllers
nvme-tcp: avoid request double completion for concurrent nvme_tcp_timeout
nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout
nvme: check the PRINFO bit before deciding the host buffer length

Changed files
+132 -52
drivers
lightnvm
md
nvme
include
linux
+1 -2
drivers/lightnvm/core.c
··· 844 844 rqd.ppa_addr = generic_to_dev_addr(dev, ppa); 845 845 846 846 ret = nvm_submit_io_sync_raw(dev, &rqd); 847 + __free_page(page); 847 848 if (ret) 848 849 return ret; 849 - 850 - __free_page(page); 851 850 852 851 return rqd.error; 853 852 }
+2
drivers/md/md.c
··· 639 639 * could wait for this and below md_handle_request could wait for those 640 640 * bios because of suspend check 641 641 */ 642 + spin_lock_irq(&mddev->lock); 642 643 mddev->prev_flush_start = mddev->start_flush; 643 644 mddev->flush_bio = NULL; 645 + spin_unlock_irq(&mddev->lock); 644 646 wake_up(&mddev->sb_wait); 645 647 646 648 if (bio->bi_iter.bi_size == 0) {
+15 -2
drivers/nvme/host/core.c
··· 1543 1543 } 1544 1544 1545 1545 length = (io.nblocks + 1) << ns->lba_shift; 1546 - meta_len = (io.nblocks + 1) * ns->ms; 1547 - metadata = nvme_to_user_ptr(io.metadata); 1546 + 1547 + if ((io.control & NVME_RW_PRINFO_PRACT) && 1548 + ns->ms == sizeof(struct t10_pi_tuple)) { 1549 + /* 1550 + * Protection information is stripped/inserted by the 1551 + * controller. 1552 + */ 1553 + if (nvme_to_user_ptr(io.metadata)) 1554 + return -EINVAL; 1555 + meta_len = 0; 1556 + metadata = NULL; 1557 + } else { 1558 + meta_len = (io.nblocks + 1) * ns->ms; 1559 + metadata = nvme_to_user_ptr(io.metadata); 1560 + } 1548 1561 1549 1562 if (ns->features & NVME_NS_EXT_LBAS) { 1550 1563 length += meta_len;
+81 -38
drivers/nvme/host/pci.c
··· 23 23 #include <linux/t10-pi.h> 24 24 #include <linux/types.h> 25 25 #include <linux/io-64-nonatomic-lo-hi.h> 26 + #include <linux/io-64-nonatomic-hi-lo.h> 26 27 #include <linux/sed-opal.h> 27 28 #include <linux/pci-p2pdma.h> 28 29 ··· 543 542 return true; 544 543 } 545 544 546 - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) 545 + static void nvme_free_prps(struct nvme_dev *dev, struct request *req) 547 546 { 548 - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 549 547 const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; 550 - dma_addr_t dma_addr = iod->first_dma, next_dma_addr; 548 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 549 + dma_addr_t dma_addr = iod->first_dma; 551 550 int i; 552 551 553 - if (iod->dma_len) { 554 - dma_unmap_page(dev->dev, dma_addr, iod->dma_len, 555 - rq_dma_dir(req)); 556 - return; 552 + for (i = 0; i < iod->npages; i++) { 553 + __le64 *prp_list = nvme_pci_iod_list(req)[i]; 554 + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); 555 + 556 + dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); 557 + dma_addr = next_dma_addr; 557 558 } 558 559 559 - WARN_ON_ONCE(!iod->nents); 560 + } 561 + 562 + static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) 563 + { 564 + const int last_sg = SGES_PER_PAGE - 1; 565 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 566 + dma_addr_t dma_addr = iod->first_dma; 567 + int i; 568 + 569 + for (i = 0; i < iod->npages; i++) { 570 + struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; 571 + dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); 572 + 573 + dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); 574 + dma_addr = next_dma_addr; 575 + } 576 + 577 + } 578 + 579 + static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) 580 + { 581 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 560 582 561 583 if (is_pci_p2pdma_page(sg_page(iod->sg))) 562 584 pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, 563 585 rq_dma_dir(req)); 564 586 else 565 587 dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); 588 + } 566 589 590 + static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) 591 + { 592 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 567 593 568 - if (iod->npages == 0) 569 - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], 570 - dma_addr); 571 - 572 - for (i = 0; i < iod->npages; i++) { 573 - void *addr = nvme_pci_iod_list(req)[i]; 574 - 575 - if (iod->use_sgl) { 576 - struct nvme_sgl_desc *sg_list = addr; 577 - 578 - next_dma_addr = 579 - le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); 580 - } else { 581 - __le64 *prp_list = addr; 582 - 583 - next_dma_addr = le64_to_cpu(prp_list[last_prp]); 584 - } 585 - 586 - dma_pool_free(dev->prp_page_pool, addr, dma_addr); 587 - dma_addr = next_dma_addr; 594 + if (iod->dma_len) { 595 + dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, 596 + rq_dma_dir(req)); 597 + return; 588 598 } 589 599 600 + WARN_ON_ONCE(!iod->nents); 601 + 602 + nvme_unmap_sg(dev, req); 603 + if (iod->npages == 0) 604 + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], 605 + iod->first_dma); 606 + else if (iod->use_sgl) 607 + nvme_free_sgls(dev, req); 608 + else 609 + nvme_free_prps(dev, req); 590 610 mempool_free(iod->sg, dev->iod_mempool); 591 611 } 592 612 ··· 683 661 __le64 *old_prp_list = prp_list; 684 662 prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); 685 663 if (!prp_list) 686 - return BLK_STS_RESOURCE; 664 + goto free_prps; 687 665 list[iod->npages++] = prp_list; 688 666 prp_list[0] = old_prp_list[i - 1]; 689 667 old_prp_list[i - 1] = cpu_to_le64(prp_dma); ··· 703 681 dma_addr = sg_dma_address(sg); 704 682 dma_len = sg_dma_len(sg); 705 683 } 706 - 707 684 done: 708 685 cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 709 686 cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); 710 - 711 687 return BLK_STS_OK; 712 - 713 - bad_sgl: 688 + free_prps: 689 + nvme_free_prps(dev, req); 690 + return BLK_STS_RESOURCE; 691 + bad_sgl: 714 692 WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), 715 693 "Invalid SGL for payload:%d nents:%d\n", 716 694 blk_rq_payload_bytes(req), iod->nents); ··· 782 760 783 761 sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 784 762 if (!sg_list) 785 - return BLK_STS_RESOURCE; 763 + goto free_sgls; 786 764 787 765 i = 0; 788 766 nvme_pci_iod_list(req)[iod->npages++] = sg_list; ··· 795 773 } while (--entries > 0); 796 774 797 775 return BLK_STS_OK; 776 + free_sgls: 777 + nvme_free_sgls(dev, req); 778 + return BLK_STS_RESOURCE; 798 779 } 799 780 800 781 static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, ··· 866 841 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); 867 842 iod->nents = blk_rq_map_sg(req->q, req, iod->sg); 868 843 if (!iod->nents) 869 - goto out; 844 + goto out_free_sg; 870 845 871 846 if (is_pci_p2pdma_page(sg_page(iod->sg))) 872 847 nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, ··· 875 850 nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, 876 851 rq_dma_dir(req), DMA_ATTR_NO_WARN); 877 852 if (!nr_mapped) 878 - goto out; 853 + goto out_free_sg; 879 854 880 855 iod->use_sgl = nvme_pci_use_sgls(dev, req); 881 856 if (iod->use_sgl) 882 857 ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); 883 858 else 884 859 ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 885 - out: 886 860 if (ret != BLK_STS_OK) 887 - nvme_unmap_data(dev, req); 861 + goto out_unmap_sg; 862 + return BLK_STS_OK; 863 + 864 + out_unmap_sg: 865 + nvme_unmap_sg(dev, req); 866 + out_free_sg: 867 + mempool_free(iod->sg, dev->iod_mempool); 888 868 return ret; 889 869 } 890 870 ··· 1825 1795 if (dev->cmb_size) 1826 1796 return; 1827 1797 1798 + if (NVME_CAP_CMBS(dev->ctrl.cap)) 1799 + writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC); 1800 + 1828 1801 dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); 1829 1802 if (!dev->cmbsz) 1830 1803 return; ··· 1840 1807 1841 1808 if (offset > bar_size) 1842 1809 return; 1810 + 1811 + /* 1812 + * Tell the controller about the host side address mapping the CMB, 1813 + * and enable CMB decoding for the NVMe 1.4+ scheme: 1814 + */ 1815 + if (NVME_CAP_CMBS(dev->ctrl.cap)) { 1816 + hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE | 1817 + (pci_bus_address(pdev, bar) + offset), 1818 + dev->bar + NVME_REG_CMBMSC); 1819 + } 1843 1820 1844 1821 /* 1845 1822 * Controllers may support a CMB size larger than their BAR,
+11 -4
drivers/nvme/host/rdma.c
··· 97 97 struct completion cm_done; 98 98 bool pi_support; 99 99 int cq_size; 100 + struct mutex queue_lock; 100 101 }; 101 102 102 103 struct nvme_rdma_ctrl { ··· 580 579 int ret; 581 580 582 581 queue = &ctrl->queues[idx]; 582 + mutex_init(&queue->queue_lock); 583 583 queue->ctrl = ctrl; 584 584 if (idx && ctrl->ctrl.max_integrity_segments) 585 585 queue->pi_support = true; ··· 600 598 if (IS_ERR(queue->cm_id)) { 601 599 dev_info(ctrl->ctrl.device, 602 600 "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id)); 603 - return PTR_ERR(queue->cm_id); 601 + ret = PTR_ERR(queue->cm_id); 602 + goto out_destroy_mutex; 604 603 } 605 604 606 605 if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) ··· 631 628 out_destroy_cm_id: 632 629 rdma_destroy_id(queue->cm_id); 633 630 nvme_rdma_destroy_queue_ib(queue); 631 + out_destroy_mutex: 632 + mutex_destroy(&queue->queue_lock); 634 633 return ret; 635 634 } 636 635 ··· 644 639 645 640 static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) 646 641 { 647 - if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) 648 - return; 649 - __nvme_rdma_stop_queue(queue); 642 + mutex_lock(&queue->queue_lock); 643 + if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) 644 + __nvme_rdma_stop_queue(queue); 645 + mutex_unlock(&queue->queue_lock); 650 646 } 651 647 652 648 static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) ··· 657 651 658 652 nvme_rdma_destroy_queue_ib(queue); 659 653 rdma_destroy_id(queue->cm_id); 654 + mutex_destroy(&queue->queue_lock); 660 655 } 661 656 662 657 static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
+10 -4
drivers/nvme/host/tcp.c
··· 76 76 struct work_struct io_work; 77 77 int io_cpu; 78 78 79 + struct mutex queue_lock; 79 80 struct mutex send_mutex; 80 81 struct llist_head req_list; 81 82 struct list_head send_list; ··· 1220 1219 1221 1220 sock_release(queue->sock); 1222 1221 kfree(queue->pdu); 1222 + mutex_destroy(&queue->queue_lock); 1223 1223 } 1224 1224 1225 1225 static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) ··· 1382 1380 struct nvme_tcp_queue *queue = &ctrl->queues[qid]; 1383 1381 int ret, rcv_pdu_size; 1384 1382 1383 + mutex_init(&queue->queue_lock); 1385 1384 queue->ctrl = ctrl; 1386 1385 init_llist_head(&queue->req_list); 1387 1386 INIT_LIST_HEAD(&queue->send_list); ··· 1401 1398 if (ret) { 1402 1399 dev_err(nctrl->device, 1403 1400 "failed to create socket: %d\n", ret); 1404 - return ret; 1401 + goto err_destroy_mutex; 1405 1402 } 1406 1403 1407 1404 /* Single syn retry */ ··· 1510 1507 err_sock: 1511 1508 sock_release(queue->sock); 1512 1509 queue->sock = NULL; 1510 + err_destroy_mutex: 1511 + mutex_destroy(&queue->queue_lock); 1513 1512 return ret; 1514 1513 } 1515 1514 ··· 1539 1534 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 1540 1535 struct nvme_tcp_queue *queue = &ctrl->queues[qid]; 1541 1536 1542 - if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) 1543 - return; 1544 - __nvme_tcp_stop_queue(queue); 1537 + mutex_lock(&queue->queue_lock); 1538 + if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) 1539 + __nvme_tcp_stop_queue(queue); 1540 + mutex_unlock(&queue->queue_lock); 1545 1541 } 1546 1542 1547 1543 static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
+6 -2
drivers/nvme/target/admin-cmd.c
··· 487 487 488 488 /* return an all zeroed buffer if we can't find an active namespace */ 489 489 ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); 490 - if (!ns) 490 + if (!ns) { 491 + status = NVME_SC_INVALID_NS; 491 492 goto done; 493 + } 492 494 493 495 nvmet_ns_revalidate(ns); 494 496 ··· 543 541 id->nsattr |= (1 << 0); 544 542 nvmet_put_namespace(ns); 545 543 done: 546 - status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 544 + if (!status) 545 + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 546 + 547 547 kfree(id); 548 548 out: 549 549 nvmet_req_complete(req, status);
+6
include/linux/nvme.h
··· 116 116 NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer 117 117 * Location 118 118 */ 119 + NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory 120 + * Space Control 121 + */ 119 122 NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ 120 123 NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ 121 124 NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ ··· 138 135 #define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff) 139 136 #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) 140 137 #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) 138 + #define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1) 141 139 142 140 #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) 143 141 #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) ··· 196 192 NVME_CSTS_SHST_OCCUR = 1 << 2, 197 193 NVME_CSTS_SHST_CMPLT = 2 << 2, 198 194 NVME_CSTS_SHST_MASK = 3 << 2, 195 + NVME_CMBMSC_CRE = 1 << 0, 196 + NVME_CMBMSC_CMSE = 1 << 1, 199 197 }; 200 198 201 199 struct nvme_id_power_state {