Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

- NVMe pull request via Keith:
- Invalid namespace identification error handling (Marizio Ewan,
Keith)
- Fabrics keep-alive tuning (Mark)

- Fix for a bad error check regression in bcache (Markus)

- Fix for a performance regression with O_DIRECT (Ming)

- Fix for a flush related deadlock (Ming)

- Make the read-only warn on per-partition (Yu)

* tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux:
nvme-core: check for too small lba shift
blk-mq: don't count completed flush data request as inflight in case of quiesce
block: Document the role of the two attribute groups
block: warn once for each partition in bio_check_ro()
block: move .bd_inode into 1st cacheline of block_device
nvme: check for valid nvme_identify_ns() before using it
nvme-core: fix a memory leak in nvme_ns_info_from_identify()
nvme: fine-tune sending of first keep-alive
bcache: revert replacing IS_ERR_OR_NULL with IS_ERR

+58 -12
+11 -3
block/blk-core.c
··· 501 501 if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { 502 502 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) 503 503 return; 504 - pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", 505 - bio->bi_bdev); 506 - /* Older lvm-tools actually trigger this */ 504 + 505 + if (bio->bi_bdev->bd_ro_warned) 506 + return; 507 + 508 + bio->bi_bdev->bd_ro_warned = true; 509 + /* 510 + * Use ioctl to set underlying disk of raid/dm to read-only 511 + * will trigger this. 512 + */ 513 + pr_warn("Trying to write to read-only block-device %pg\n", 514 + bio->bi_bdev); 507 515 } 508 516 } 509 517
+13 -1
block/blk-mq.c
··· 1512 1512 } 1513 1513 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); 1514 1514 1515 + static bool blk_is_flush_data_rq(struct request *rq) 1516 + { 1517 + return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq); 1518 + } 1519 + 1515 1520 static bool blk_mq_rq_inflight(struct request *rq, void *priv) 1516 1521 { 1517 1522 /* 1518 1523 * If we find a request that isn't idle we know the queue is busy 1519 1524 * as it's checked in the iter. 1520 1525 * Return false to stop the iteration. 1526 + * 1527 + * In case of queue quiesce, if one flush data request is completed, 1528 + * don't count it as inflight given the flush sequence is suspended, 1529 + * and the original flush data request is invisible to driver, just 1530 + * like other pending requests because of quiesce 1521 1531 */ 1522 - if (blk_mq_request_started(rq)) { 1532 + if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) && 1533 + blk_is_flush_data_rq(rq) && 1534 + blk_mq_request_completed(rq))) { 1523 1535 bool *busy = priv; 1524 1536 1525 1537 *busy = true;
+2
block/blk-sysfs.c
··· 615 615 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); 616 616 #endif 617 617 618 + /* Common attributes for bio-based and request-based queues. */ 618 619 static struct attribute *queue_attrs[] = { 619 620 &queue_ra_entry.attr, 620 621 &queue_max_hw_sectors_entry.attr, ··· 660 659 NULL, 661 660 }; 662 661 662 + /* Request-based queue attributes that are not relevant for bio-based queues. */ 663 663 static struct attribute *blk_mq_queue_attrs[] = { 664 664 &queue_requests_entry.attr, 665 665 &elv_iosched_entry.attr,
+1 -1
drivers/md/bcache/btree.c
··· 1522 1522 bch_keylist_free(&keylist); 1523 1523 1524 1524 for (i = 0; i < nodes; i++) 1525 - if (!IS_ERR(new_nodes[i])) { 1525 + if (!IS_ERR_OR_NULL(new_nodes[i])) { 1526 1526 btree_node_free(new_nodes[i]); 1527 1527 rw_unlock(true, new_nodes[i]); 1528 1528 }
+28 -6
drivers/nvme/host/core.c
··· 1192 1192 1193 1193 static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) 1194 1194 { 1195 - queue_delayed_work(nvme_wq, &ctrl->ka_work, 1196 - nvme_keep_alive_work_period(ctrl)); 1195 + unsigned long now = jiffies; 1196 + unsigned long delay = nvme_keep_alive_work_period(ctrl); 1197 + unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay; 1198 + 1199 + if (time_after(now, ka_next_check_tm)) 1200 + delay = 0; 1201 + else 1202 + delay = ka_next_check_tm - now; 1203 + 1204 + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); 1197 1205 } 1198 1206 1199 1207 static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, ··· 1487 1479 if (id->ncap == 0) { 1488 1480 /* namespace not allocated or attached */ 1489 1481 info->is_removed = true; 1490 - return -ENODEV; 1482 + ret = -ENODEV; 1483 + goto error; 1491 1484 } 1492 1485 1493 1486 info->anagrpid = id->anagrpid; ··· 1506 1497 !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) 1507 1498 memcpy(ids->nguid, id->nguid, sizeof(ids->nguid)); 1508 1499 } 1500 + 1501 + error: 1509 1502 kfree(id); 1510 - return 0; 1503 + return ret; 1511 1504 } 1512 1505 1513 1506 static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, ··· 1901 1890 1902 1891 /* 1903 1892 * The block layer can't support LBA sizes larger than the page size 1904 - * yet, so catch this early and don't allow block I/O. 1893 + * or smaller than a sector size yet, so catch this early and don't 1894 + * allow block I/O. 1905 1895 */ 1906 - if (ns->lba_shift > PAGE_SHIFT) { 1896 + if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) { 1907 1897 capacity = 0; 1908 1898 bs = (1 << 9); 1909 1899 } ··· 2041 2029 if (ret) 2042 2030 return ret; 2043 2031 2032 + if (id->ncap == 0) { 2033 + /* namespace not allocated or attached */ 2034 + info->is_removed = true; 2035 + ret = -ENODEV; 2036 + goto error; 2037 + } 2038 + 2044 2039 blk_mq_freeze_queue(ns->disk->queue); 2045 2040 lbaf = nvme_lbaf_index(id->flbas); 2046 2041 ns->lba_shift = id->lbaf[lbaf].ds; ··· 2109 2090 set_bit(NVME_NS_READY, &ns->flags); 2110 2091 ret = 0; 2111 2092 } 2093 + 2094 + error: 2112 2095 kfree(id); 2113 2096 return ret; 2114 2097 } ··· 4492 4471 INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); 4493 4472 memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); 4494 4473 ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; 4474 + ctrl->ka_last_check_time = jiffies; 4495 4475 4496 4476 BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > 4497 4477 PAGE_SIZE);
+3 -1
include/linux/blk_types.h
··· 49 49 bool bd_write_holder; 50 50 bool bd_has_submit_bio; 51 51 dev_t bd_dev; 52 + struct inode *bd_inode; /* will die */ 53 + 52 54 atomic_t bd_openers; 53 55 spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ 54 - struct inode * bd_inode; /* will die */ 55 56 void * bd_claiming; 56 57 void * bd_holder; 57 58 const struct blk_holder_ops *bd_holder_ops; ··· 70 69 #ifdef CONFIG_FAIL_MAKE_REQUEST 71 70 bool bd_make_it_fail; 72 71 #endif 72 + bool bd_ro_warned; 73 73 /* 74 74 * keep this out-of-line as it's both big and not needed in the fast 75 75 * path