Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'block-6.15-20250515' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

- NVMe pull request via Christoph:
- fixes for atomic writes (Alan Adamson)
- fixes for polled CQs in nvmet-epf (Damien Le Moal)
- fix for polled CQs in nvme-pci (Keith Busch)
- fix compile on odd configs that need to be forced to inline
(Kees Cook)
- one more quirk (Ilya Guterman)

- Fix for missing allocation of an integrity buffer for some cases

- Fix for a regression with ublk command cancelation

* tag 'block-6.15-20250515' of git://git.kernel.dk/linux:
ublk: fix dead loop when canceling io command
nvme-pci: add NVME_QUIRK_NO_DEEPEST_PS quirk for SOLIDIGM P44 Pro
nvme: all namespaces in a subsystem must adhere to a common atomic write size
nvme: multipath: enable BLK_FEAT_ATOMIC_WRITES for multipathing
nvmet: pci-epf: remove NVMET_PCI_EPF_Q_IS_SQ
nvmet: pci-epf: improve debug message
nvmet: pci-epf: cleanup nvmet_pci_epf_raise_irq()
nvmet: pci-epf: do not fall back to using INTX if not supported
nvmet: pci-epf: clear completion queue IRQ flag on delete
nvme-pci: acquire cq_poll_lock in nvme_poll_irqdisable
nvme-pci: make nvme_pci_npages_prp() __always_inline
block: always allocate integrity buffer when required

+107 -38
+47 -15
block/bio-integrity-auto.c
··· 9 9 * not aware of PI. 10 10 */ 11 11 #include <linux/blk-integrity.h> 12 + #include <linux/t10-pi.h> 12 13 #include <linux/workqueue.h> 13 14 #include "blk.h" 14 15 ··· 44 43 bio_endio(bio); 45 44 } 46 45 46 + #define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) 47 + static bool bip_should_check(struct bio_integrity_payload *bip) 48 + { 49 + return bip->bip_flags & BIP_CHECK_FLAGS; 50 + } 51 + 52 + static bool bi_offload_capable(struct blk_integrity *bi) 53 + { 54 + switch (bi->csum_type) { 55 + case BLK_INTEGRITY_CSUM_CRC64: 56 + return bi->tuple_size == sizeof(struct crc64_pi_tuple); 57 + case BLK_INTEGRITY_CSUM_CRC: 58 + case BLK_INTEGRITY_CSUM_IP: 59 + return bi->tuple_size == sizeof(struct t10_pi_tuple); 60 + default: 61 + pr_warn_once("%s: unknown integrity checksum type:%d\n", 62 + __func__, bi->csum_type); 63 + fallthrough; 64 + case BLK_INTEGRITY_CSUM_NONE: 65 + return false; 66 + } 67 + } 68 + 47 69 /** 48 70 * __bio_integrity_endio - Integrity I/O completion function 49 71 * @bio: Protected bio ··· 78 54 */ 79 55 bool __bio_integrity_endio(struct bio *bio) 80 56 { 81 - struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 82 57 struct bio_integrity_payload *bip = bio_integrity(bio); 83 58 struct bio_integrity_data *bid = 84 59 container_of(bip, struct bio_integrity_data, bip); 85 60 86 - if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) { 61 + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && 62 + bip_should_check(bip)) { 87 63 INIT_WORK(&bid->work, bio_integrity_verify_fn); 88 64 queue_work(kintegrityd_wq, &bid->work); 89 65 return false; ··· 108 84 { 109 85 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 110 86 struct bio_integrity_data *bid; 87 + bool set_flags = true; 111 88 gfp_t gfp = GFP_NOIO; 112 89 unsigned int len; 113 90 void *buf; ··· 125 100 126 101 switch (bio_op(bio)) { 127 102 case REQ_OP_READ: 128 - if (bi->flags & BLK_INTEGRITY_NOVERIFY) 129 - return true; 103 + if (bi->flags & BLK_INTEGRITY_NOVERIFY) { 104 + if (bi_offload_capable(bi)) 105 + return true; 106 + set_flags = false; 107 + } 130 108 break; 131 109 case REQ_OP_WRITE: 132 - if (bi->flags & BLK_INTEGRITY_NOGENERATE) 133 - return true; 134 - 135 110 /* 136 111 * Zero the memory allocated to not leak uninitialized kernel 137 112 * memory to disk for non-integrity metadata where nothing else 138 113 * initializes the memory. 139 114 */ 140 - if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) 115 + if (bi->flags & BLK_INTEGRITY_NOGENERATE) { 116 + if (bi_offload_capable(bi)) 117 + return true; 118 + set_flags = false; 119 + gfp |= __GFP_ZERO; 120 + } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) 141 121 gfp |= __GFP_ZERO; 142 122 break; 143 123 default: ··· 167 137 bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; 168 138 bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); 169 139 170 - if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) 171 - bid->bip.bip_flags |= BIP_IP_CHECKSUM; 172 - if (bi->csum_type) 173 - bid->bip.bip_flags |= BIP_CHECK_GUARD; 174 - if (bi->flags & BLK_INTEGRITY_REF_TAG) 175 - bid->bip.bip_flags |= BIP_CHECK_REFTAG; 140 + if (set_flags) { 141 + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) 142 + bid->bip.bip_flags |= BIP_IP_CHECKSUM; 143 + if (bi->csum_type) 144 + bid->bip.bip_flags |= BIP_CHECK_GUARD; 145 + if (bi->flags & BLK_INTEGRITY_REF_TAG) 146 + bid->bip.bip_flags |= BIP_CHECK_REFTAG; 147 + } 176 148 177 149 if (bio_integrity_add_page(bio, virt_to_page(buf), len, 178 150 offset_in_page(buf)) < len) 179 151 goto err_end_io; 180 152 181 153 /* Auto-generate integrity metadata if this is a write */ 182 - if (bio_data_dir(bio) == WRITE) 154 + if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) 183 155 blk_integrity_generate(bio); 184 156 else 185 157 bid->saved_bio_iter = bio->bi_iter;
+1 -1
drivers/block/ublk_drv.c
··· 1708 1708 * that ublk_dispatch_req() is always called 1709 1709 */ 1710 1710 req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); 1711 - if (req && blk_mq_request_started(req)) 1711 + if (req && blk_mq_request_started(req) && req->tag == tag) 1712 1712 return; 1713 1713 1714 1714 spin_lock(&ubq->cancel_lock);
+27 -3
drivers/nvme/host/core.c
··· 2059 2059 if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) 2060 2060 atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; 2061 2061 else 2062 - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; 2062 + atomic_bs = (1 + ns->ctrl->awupf) * bs; 2063 + 2064 + /* 2065 + * Set subsystem atomic bs. 2066 + */ 2067 + if (ns->ctrl->subsys->atomic_bs) { 2068 + if (atomic_bs != ns->ctrl->subsys->atomic_bs) { 2069 + dev_err_ratelimited(ns->ctrl->device, 2070 + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", 2071 + ns->disk ? ns->disk->disk_name : "?", 2072 + ns->ctrl->subsys->atomic_bs, 2073 + atomic_bs); 2074 + } 2075 + } else 2076 + ns->ctrl->subsys->atomic_bs = atomic_bs; 2063 2077 2064 2078 nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); 2065 2079 } ··· 2215 2201 nvme_set_chunk_sectors(ns, id, &lim); 2216 2202 if (!nvme_update_disk_info(ns, id, &lim)) 2217 2203 capacity = 0; 2204 + 2205 + /* 2206 + * Validate the max atomic write size fits within the subsystem's 2207 + * atomic write capabilities. 2208 + */ 2209 + if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { 2210 + blk_mq_unfreeze_queue(ns->disk->queue, memflags); 2211 + ret = -ENXIO; 2212 + goto out; 2213 + } 2214 + 2218 2215 nvme_config_discard(ns, &lim); 2219 2216 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && 2220 2217 ns->head->ids.csi == NVME_CSI_ZNS) ··· 3056 3031 kfree(subsys); 3057 3032 return -EINVAL; 3058 3033 } 3059 - subsys->awupf = le16_to_cpu(id->awupf); 3060 3034 nvme_mpath_default_iopolicy(subsys); 3061 3035 3062 3036 subsys->dev.class = &nvme_subsys_class; ··· 3465 3441 dev_pm_qos_expose_latency_tolerance(ctrl->device); 3466 3442 else if (!ctrl->apst_enabled && prev_apst_enabled) 3467 3443 dev_pm_qos_hide_latency_tolerance(ctrl->device); 3468 - 3444 + ctrl->awupf = le16_to_cpu(id->awupf); 3469 3445 out_free: 3470 3446 kfree(id); 3471 3447 return ret;
+2 -1
drivers/nvme/host/multipath.c
··· 638 638 639 639 blk_set_stacking_limits(&lim); 640 640 lim.dma_alignment = 3; 641 - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; 641 + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | 642 + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; 642 643 if (head->ids.csi == NVME_CSI_ZNS) 643 644 lim.features |= BLK_FEAT_ZONED; 644 645
+2 -1
drivers/nvme/host/nvme.h
··· 410 410 411 411 enum nvme_ctrl_type cntrltype; 412 412 enum nvme_dctype dctype; 413 + u16 awupf; /* 0's based value. */ 413 414 }; 414 415 415 416 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) ··· 443 442 u8 cmic; 444 443 enum nvme_subsys_type subtype; 445 444 u16 vendor_id; 446 - u16 awupf; /* 0's based awupf value. */ 447 445 struct ida ns_ida; 448 446 #ifdef CONFIG_NVME_MULTIPATH 449 447 enum nvme_iopolicy iopolicy; 450 448 #endif 449 + u32 atomic_bs; 451 450 }; 452 451 453 452 /*
+5 -1
drivers/nvme/host/pci.c
··· 390 390 * as it only leads to a small amount of wasted memory for the lifetime of 391 391 * the I/O. 392 392 */ 393 - static int nvme_pci_npages_prp(void) 393 + static __always_inline int nvme_pci_npages_prp(void) 394 394 { 395 395 unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; 396 396 unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); ··· 1202 1202 WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); 1203 1203 1204 1204 disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); 1205 + spin_lock(&nvmeq->cq_poll_lock); 1205 1206 nvme_poll_cq(nvmeq, NULL); 1207 + spin_unlock(&nvmeq->cq_poll_lock); 1206 1208 enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); 1207 1209 } 1208 1210 ··· 3738 3736 { PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */ 3739 3737 .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, 3740 3738 { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ 3739 + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, 3740 + { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ 3741 3741 .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, 3742 3742 { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ 3743 3743 .driver_data = NVME_QUIRK_BOGUS_NID, },
+23 -16
drivers/nvme/target/pci-epf.c
··· 62 62 #define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1) 63 63 64 64 enum nvmet_pci_epf_queue_flags { 65 - NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */ 66 - NVMET_PCI_EPF_Q_LIVE, /* The queue is live */ 65 + NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */ 67 66 NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */ 68 67 }; 69 68 ··· 595 596 struct nvmet_pci_epf_irq_vector *iv = cq->iv; 596 597 bool ret; 597 598 598 - if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) 599 - return false; 600 - 601 599 /* IRQ coalescing for the admin queue is not allowed. */ 602 600 if (!cq->qid) 603 601 return true; ··· 621 625 struct pci_epf *epf = nvme_epf->epf; 622 626 int ret = 0; 623 627 624 - if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) 628 + if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) || 629 + !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) 625 630 return; 626 631 627 632 mutex_lock(&ctrl->irq_lock); ··· 633 636 switch (nvme_epf->irq_type) { 634 637 case PCI_IRQ_MSIX: 635 638 case PCI_IRQ_MSI: 639 + /* 640 + * If we fail to raise an MSI or MSI-X interrupt, it is likely 641 + * because the host is using legacy INTX IRQs (e.g. BIOS, 642 + * grub), but we can fallback to the INTX type only if the 643 + * endpoint controller supports this type. 644 + */ 636 645 ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, 637 646 nvme_epf->irq_type, cq->vector + 1); 638 - if (!ret) 647 + if (!ret || !nvme_epf->epc_features->intx_capable) 639 648 break; 640 - /* 641 - * If we got an error, it is likely because the host is using 642 - * legacy IRQs (e.g. BIOS, grub). 643 - */ 644 649 fallthrough; 645 650 case PCI_IRQ_INTX: 646 651 ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, ··· 655 656 } 656 657 657 658 if (ret) 658 - dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret); 659 + dev_err_ratelimited(ctrl->dev, 660 + "CQ[%u]: Failed to raise IRQ (err=%d)\n", 661 + cq->qid, ret); 659 662 660 663 unlock: 661 664 mutex_unlock(&ctrl->irq_lock); ··· 1320 1319 1321 1320 set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); 1322 1321 1323 - dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", 1324 - cqid, qsize, cq->qes, cq->vector); 1322 + if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) 1323 + dev_dbg(ctrl->dev, 1324 + "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", 1325 + cqid, qsize, cq->qes, cq->vector); 1326 + else 1327 + dev_dbg(ctrl->dev, 1328 + "CQ[%u]: %u entries of %zu B, IRQ disabled\n", 1329 + cqid, qsize, cq->qes); 1325 1330 1326 1331 return NVME_SC_SUCCESS; 1327 1332 ··· 1351 1344 1352 1345 cancel_delayed_work_sync(&cq->work); 1353 1346 nvmet_pci_epf_drain_queue(cq); 1354 - nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); 1347 + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) 1348 + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); 1355 1349 nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); 1356 1350 1357 1351 return NVME_SC_SUCCESS; ··· 1541 1533 1542 1534 if (sq) { 1543 1535 queue = &ctrl->sq[qid]; 1544 - set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags); 1545 1536 } else { 1546 1537 queue = &ctrl->cq[qid]; 1547 1538 INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work);