Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

null_blk: add zone support

Adds support for exposing a null_blk device through the zone device
interface.

The interface is managed with the parameters zoned and zone_size.
If zoned is set, the null_blk instance registers as a zoned block
device. The zone_size parameter defines how big each zone will be.

Signed-off-by: Matias Bjørling <matias.bjorling@wdc.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Matias Bjørling and committed by
Jens Axboe
ca4b2a01 6dad38d3

+234 -3
+7
Documentation/block/null_blk.txt
··· 85 85 0: Tag set is not shared. 86 86 1: Tag set shared between devices for blk-mq. Only makes sense with 87 87 nr_devices > 1, otherwise there's no tag set to share. 88 + 89 + zoned=[0/1]: Default: 0 90 + 0: Block device is exposed as a random-access block device. 91 + 1: Block device is exposed as a host-managed zoned block device. 92 + 93 + zone_size=[MB]: Default: 256 94 + Per zone size when exposed as a zoned block device. Must be a power of two.
+4 -1
drivers/block/Makefile
··· 36 36 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ 37 37 38 38 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ 39 - obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o 40 39 obj-$(CONFIG_ZRAM) += zram/ 40 + 41 + obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk_mod.o 42 + null_blk_mod-objs := null_blk.o 43 + null_blk_mod-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o 41 44 42 45 skd-y := skd_main.o 43 46 swim_mod-y := swim.o swim_asm.o
+46 -2
drivers/block/null_blk.c
··· 180 180 module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444); 181 181 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); 182 182 183 + static bool g_zoned; 184 + module_param_named(zoned, g_zoned, bool, S_IRUGO); 185 + MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false"); 186 + 187 + static unsigned long g_zone_size = 256; 188 + module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); 189 + MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); 190 + 183 191 static struct nullb_device *null_alloc_dev(void); 184 192 static void null_free_dev(struct nullb_device *dev); 185 193 static void null_del_dev(struct nullb *nullb); ··· 291 283 NULLB_DEVICE_ATTR(discard, bool); 292 284 NULLB_DEVICE_ATTR(mbps, uint); 293 285 NULLB_DEVICE_ATTR(cache_size, ulong); 286 + NULLB_DEVICE_ATTR(zoned, bool); 287 + NULLB_DEVICE_ATTR(zone_size, ulong); 294 288 295 289 static ssize_t nullb_device_power_show(struct config_item *item, char *page) 296 290 { ··· 405 395 &nullb_device_attr_mbps, 406 396 &nullb_device_attr_cache_size, 407 397 &nullb_device_attr_badblocks, 398 + &nullb_device_attr_zoned, 399 + &nullb_device_attr_zone_size, 408 400 NULL, 409 401 }; 410 402 ··· 459 447 460 448 static ssize_t memb_group_features_show(struct config_item *item, char *page) 461 449 { 462 - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n"); 450 + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n"); 463 451 } 464 452 465 453 CONFIGFS_ATTR_RO(memb_group_, features); ··· 518 506 dev->hw_queue_depth = g_hw_queue_depth; 519 507 dev->blocking = g_blocking; 520 508 dev->use_per_node_hctx = g_use_per_node_hctx; 509 + dev->zoned = g_zoned; 510 + dev->zone_size = g_zone_size; 521 511 return dev; 522 512 } 523 513 ··· 528 514 if (!dev) 529 515 return; 530 516 517 + null_zone_exit(dev); 531 518 badblocks_exit(&dev->badblocks); 532 519 kfree(dev); 533 520 } ··· 1161 1146 struct nullb *nullb = dev->nullb; 1162 1147 int err = 0; 1163 1148 1149 + if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { 1150 + cmd->error = null_zone_report(nullb, cmd); 1151 + goto out; 1152 + } 1153 + 1164 1154 if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { 1165 1155 struct request *rq = cmd->rq; 1166 1156 ··· 1230 1210 } 1231 1211 } 1232 1212 cmd->error = errno_to_blk_status(err); 1213 + 1214 + if (!cmd->error && dev->zoned) { 1215 + if (req_op(cmd->rq) == REQ_OP_WRITE) 1216 + null_zone_write(cmd); 1217 + else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET) 1218 + null_zone_reset(cmd); 1219 + } 1233 1220 out: 1234 1221 /* Complete IO by inline, softirq or timer */ 1235 1222 switch (dev->irqmode) { ··· 1764 1737 blk_queue_flush_queueable(nullb->q, true); 1765 1738 } 1766 1739 1740 + if (dev->zoned) { 1741 + rv = null_zone_init(dev); 1742 + if (rv) 1743 + goto out_cleanup_blk_queue; 1744 + 1745 + blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects); 1746 + nullb->q->limits.zoned = BLK_ZONED_HM; 1747 + } 1748 + 1767 1749 nullb->q->queuedata = nullb; 1768 1750 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); 1769 1751 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); ··· 1791 1755 1792 1756 rv = null_gendisk_register(nullb); 1793 1757 if (rv) 1794 - goto out_cleanup_blk_queue; 1758 + goto out_cleanup_zone; 1795 1759 1796 1760 mutex_lock(&lock); 1797 1761 list_add_tail(&nullb->list, &nullb_list); 1798 1762 mutex_unlock(&lock); 1799 1763 1800 1764 return 0; 1765 + out_cleanup_zone: 1766 + if (dev->zoned) 1767 + null_zone_exit(dev); 1801 1768 out_cleanup_blk_queue: 1802 1769 blk_cleanup_queue(nullb->q); 1803 1770 out_cleanup_tags: ··· 1825 1786 pr_warn("null_blk: invalid block size\n"); 1826 1787 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 1827 1788 g_bs = PAGE_SIZE; 1789 + } 1790 + 1791 + if (!is_power_of_2(g_zone_size)) { 1792 + pr_err("null_blk: zone_size must be power-of-two\n"); 1793 + return -EINVAL; 1828 1794 } 1829 1795 1830 1796 if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+28
drivers/block/null_blk.h
··· 41 41 unsigned int curr_cache; 42 42 struct badblocks badblocks; 43 43 44 + unsigned int nr_zones; 45 + struct blk_zone *zones; 46 + sector_t zone_size_sects; 47 + 44 48 unsigned long size; /* device size in MB */ 45 49 unsigned long completion_nsec; /* time in ns to complete a request */ 46 50 unsigned long cache_size; /* disk cache size in MB */ 51 + unsigned long zone_size; /* zone size in MB if device is zoned */ 47 52 unsigned int submit_queues; /* number of submission queues */ 48 53 unsigned int home_node; /* home node for the device */ 49 54 unsigned int queue_mode; /* block interface */ ··· 62 57 bool power; /* power on/off the device */ 63 58 bool memory_backed; /* if data is stored in memory */ 64 59 bool discard; /* if support discard */ 60 + bool zoned; /* if device is zoned */ 65 61 }; 66 62 67 63 struct nullb { ··· 83 77 unsigned int nr_queues; 84 78 char disk_name[DISK_NAME_LEN]; 85 79 }; 80 + 81 + #ifdef CONFIG_BLK_DEV_ZONED 82 + int null_zone_init(struct nullb_device *dev); 83 + void null_zone_exit(struct nullb_device *dev); 84 + blk_status_t null_zone_report(struct nullb *nullb, 85 + struct nullb_cmd *cmd); 86 + void null_zone_write(struct nullb_cmd *cmd); 87 + void null_zone_reset(struct nullb_cmd *cmd); 88 + #else 89 + static inline int null_zone_init(struct nullb_device *dev) 90 + { 91 + return -EINVAL; 92 + } 93 + static inline void null_zone_exit(struct nullb_device *dev) {} 94 + static inline blk_status_t null_zone_report(struct nullb *nullb, 95 + struct nullb_cmd *cmd) 96 + { 97 + return BLK_STS_NOTSUPP; 98 + } 99 + static inline void null_zone_write(struct nullb_cmd *cmd) {} 100 + static inline void null_zone_reset(struct nullb_cmd *cmd) {} 101 + #endif /* CONFIG_BLK_DEV_ZONED */ 86 102 #endif /* __NULL_BLK_H */
+149
drivers/block/null_blk_zoned.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/vmalloc.h> 3 + #include "null_blk.h" 4 + 5 + /* zone_size in MBs to sectors. */ 6 + #define ZONE_SIZE_SHIFT 11 7 + 8 + static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) 9 + { 10 + return sect >> ilog2(dev->zone_size_sects); 11 + } 12 + 13 + int null_zone_init(struct nullb_device *dev) 14 + { 15 + sector_t dev_size = (sector_t)dev->size * 1024 * 1024; 16 + sector_t sector = 0; 17 + unsigned int i; 18 + 19 + if (!is_power_of_2(dev->zone_size)) { 20 + pr_err("null_blk: zone_size must be power-of-two\n"); 21 + return -EINVAL; 22 + } 23 + 24 + dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; 25 + dev->nr_zones = dev_size >> 26 + (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); 27 + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone), 28 + GFP_KERNEL | __GFP_ZERO); 29 + if (!dev->zones) 30 + return -ENOMEM; 31 + 32 + for (i = 0; i < dev->nr_zones; i++) { 33 + struct blk_zone *zone = &dev->zones[i]; 34 + 35 + zone->start = zone->wp = sector; 36 + zone->len = dev->zone_size_sects; 37 + zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; 38 + zone->cond = BLK_ZONE_COND_EMPTY; 39 + 40 + sector += dev->zone_size_sects; 41 + } 42 + 43 + return 0; 44 + } 45 + 46 + void null_zone_exit(struct nullb_device *dev) 47 + { 48 + kvfree(dev->zones); 49 + } 50 + 51 + static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq, 52 + unsigned int zno, unsigned int nr_zones) 53 + { 54 + struct blk_zone_report_hdr *hdr = NULL; 55 + struct bio_vec bvec; 56 + struct bvec_iter iter; 57 + void *addr; 58 + unsigned int zones_to_cpy; 59 + 60 + bio_for_each_segment(bvec, rq->bio, iter) { 61 + addr = kmap_atomic(bvec.bv_page); 62 + 63 + zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone); 64 + 65 + if (!hdr) { 66 + hdr = (struct blk_zone_report_hdr *)addr; 67 + hdr->nr_zones = nr_zones; 68 + zones_to_cpy--; 69 + addr += sizeof(struct blk_zone_report_hdr); 70 + } 71 + 72 + zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones); 73 + 74 + memcpy(addr, &dev->zones[zno], 75 + zones_to_cpy * sizeof(struct blk_zone)); 76 + 77 + kunmap_atomic(addr); 78 + 79 + nr_zones -= zones_to_cpy; 80 + zno += zones_to_cpy; 81 + 82 + if (!nr_zones) 83 + break; 84 + } 85 + } 86 + 87 + blk_status_t null_zone_report(struct nullb *nullb, 88 + struct nullb_cmd *cmd) 89 + { 90 + struct nullb_device *dev = nullb->dev; 91 + struct request *rq = cmd->rq; 92 + unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); 93 + unsigned int nr_zones = dev->nr_zones - zno; 94 + unsigned int max_zones = (blk_rq_bytes(rq) / 95 + sizeof(struct blk_zone)) - 1; 96 + 97 + nr_zones = min_t(unsigned int, nr_zones, max_zones); 98 + 99 + null_zone_fill_rq(nullb->dev, rq, zno, nr_zones); 100 + 101 + return BLK_STS_OK; 102 + } 103 + 104 + void null_zone_write(struct nullb_cmd *cmd) 105 + { 106 + struct nullb_device *dev = cmd->nq->dev; 107 + struct request *rq = cmd->rq; 108 + sector_t sector = blk_rq_pos(rq); 109 + unsigned int rq_sectors = blk_rq_sectors(rq); 110 + unsigned int zno = null_zone_no(dev, sector); 111 + struct blk_zone *zone = &dev->zones[zno]; 112 + 113 + switch (zone->cond) { 114 + case BLK_ZONE_COND_FULL: 115 + /* Cannot write to a full zone */ 116 + cmd->error = BLK_STS_IOERR; 117 + break; 118 + case BLK_ZONE_COND_EMPTY: 119 + case BLK_ZONE_COND_IMP_OPEN: 120 + /* Writes must be at the write pointer position */ 121 + if (blk_rq_pos(rq) != zone->wp) { 122 + cmd->error = BLK_STS_IOERR; 123 + break; 124 + } 125 + 126 + if (zone->cond == BLK_ZONE_COND_EMPTY) 127 + zone->cond = BLK_ZONE_COND_IMP_OPEN; 128 + 129 + zone->wp += rq_sectors; 130 + if (zone->wp == zone->start + zone->len) 131 + zone->cond = BLK_ZONE_COND_FULL; 132 + break; 133 + default: 134 + /* Invalid zone condition */ 135 + cmd->error = BLK_STS_IOERR; 136 + break; 137 + } 138 + } 139 + 140 + void null_zone_reset(struct nullb_cmd *cmd) 141 + { 142 + struct nullb_device *dev = cmd->nq->dev; 143 + struct request *rq = cmd->rq; 144 + unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); 145 + struct blk_zone *zone = &dev->zones[zno]; 146 + 147 + zone->cond = BLK_ZONE_COND_EMPTY; 148 + zone->wp = zone->start; 149 + }