Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

zloop: add max_open_zones option

Introduce the new max_open_zones option to allow specifying a limit on
the maximum number of open zones of a zloop device. This change allows
creating a zloop device that can more closely mimick the characteristics
of a physical SMR drive.

When set to a non zero value, only up to max_open_zones zones can be in
the implicit open (BLK_ZONE_COND_IMP_OPEN) and explicit open
(BLK_ZONE_COND_EXP_OPEN) conditions at any time. The transition to the
implicit open condition of a zone on a write operation can result in an
implicit close of an already implicitly open zone. This is handled in
the function zloop_do_open_zone(). This function also handles
transitions to the explicit open condition. Implicit close transitions
are handled using an LRU ordered list of open zones which is managed
using the helper functions zloop_lru_rotate_open_zone() and
zloop_lru_remove_open_zone().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260326203245.946830-1-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Damien Le Moal and committed by
Jens Axboe
b2a78fec 2a2f520f

+168 -17
+4 -1
Documentation/admin-guide/blockdev/zoned_loop.rst
··· 62 62 /dev/zloop-control device:: 63 63 64 64 $ cat /dev/zloop-control 65 - add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io 65 + add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,max_open_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io,zone_append=%u,ordered_zone_append,discard_write_cache 66 66 remove id=%d 67 67 68 68 In more details, the options that can be used with the "add" command are as ··· 80 80 conv_zones Total number of conventioanl zones starting from 81 81 sector 0 82 82 Default: 8 83 + max_open_zones Maximum number of open sequential write required zones 84 + (0 for no limit). 85 + Default: 0 83 86 base_dir Path to the base directory where to create the directory 84 87 containing the zone files of the device. 85 88 Default=/var/local/zloop.
+164 -16
drivers/block/zloop.c
··· 36 36 ZLOOP_OPT_ZONE_APPEND = (1 << 9), 37 37 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), 38 38 ZLOOP_OPT_DISCARD_WRITE_CACHE = (1 << 11), 39 + ZLOOP_OPT_MAX_OPEN_ZONES = (1 << 12), 39 40 }; 40 41 41 42 static const match_table_t zloop_opt_tokens = { ··· 52 51 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, 53 52 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, 54 53 { ZLOOP_OPT_DISCARD_WRITE_CACHE, "discard_write_cache" }, 54 + { ZLOOP_OPT_MAX_OPEN_ZONES, "max_open_zones=%u" }, 55 55 { ZLOOP_OPT_ERR, NULL } 56 56 }; 57 57 ··· 61 59 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) 62 60 #define ZLOOP_DEF_NR_ZONES 64 63 61 #define ZLOOP_DEF_NR_CONV_ZONES 8 62 + #define ZLOOP_DEF_MAX_OPEN_ZONES 0 64 63 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop" 65 64 #define ZLOOP_DEF_NR_QUEUES 1 66 65 #define ZLOOP_DEF_QUEUE_DEPTH 128 ··· 79 76 sector_t zone_size; 80 77 sector_t zone_capacity; 81 78 unsigned int nr_conv_zones; 79 + unsigned int max_open_zones; 82 80 char *base_dir; 83 81 unsigned int nr_queues; 84 82 unsigned int queue_depth; ··· 103 99 ZLOOP_ZONE_SEQ_ERROR, 104 100 }; 105 101 102 + /* 103 + * Zone descriptor. 104 + * Locking order: z.lock -> z.wp_lock -> zlo.open_zones_lock 105 + */ 106 106 struct zloop_zone { 107 + struct list_head open_zone_entry; 107 108 struct file *file; 108 109 109 110 unsigned long flags; ··· 142 133 sector_t zone_capacity; 143 134 unsigned int nr_zones; 144 135 unsigned int nr_conv_zones; 136 + unsigned int max_open_zones; 145 137 unsigned int block_size; 138 + 139 + spinlock_t open_zones_lock; 140 + struct list_head open_zones_lru_list; 141 + unsigned int nr_open_zones; 146 142 147 143 struct zloop_zone zones[] __counted_by(nr_zones); 148 144 }; ··· 170 156 struct zloop_device *zlo = rq->q->queuedata; 171 157 172 158 return blk_rq_pos(rq) >> zlo->zone_shift; 159 + } 160 + 161 + /* 162 + * Open an already open zone. This is mostly a no-op, except for the imp open -> 163 + * exp open condition change that may happen. We also move a zone at the tail of 164 + * the list of open zones so that if we need to 165 + * implicitly close one open zone, we can do so in LRU order. 166 + */ 167 + static inline void zloop_lru_rotate_open_zone(struct zloop_device *zlo, 168 + struct zloop_zone *zone) 169 + { 170 + if (zlo->max_open_zones) { 171 + spin_lock(&zlo->open_zones_lock); 172 + list_move_tail(&zone->open_zone_entry, 173 + &zlo->open_zones_lru_list); 174 + spin_unlock(&zlo->open_zones_lock); 175 + } 176 + } 177 + 178 + static inline void zloop_lru_remove_open_zone(struct zloop_device *zlo, 179 + struct zloop_zone *zone) 180 + { 181 + if (zone->cond == BLK_ZONE_COND_IMP_OPEN || 182 + zone->cond == BLK_ZONE_COND_EXP_OPEN) { 183 + spin_lock(&zlo->open_zones_lock); 184 + list_del_init(&zone->open_zone_entry); 185 + zlo->nr_open_zones--; 186 + spin_unlock(&zlo->open_zones_lock); 187 + } 188 + } 189 + 190 + static inline bool zloop_can_open_zone(struct zloop_device *zlo) 191 + { 192 + return !zlo->max_open_zones || zlo->nr_open_zones < zlo->max_open_zones; 193 + } 194 + 195 + /* 196 + * If we have reached the maximum open zones limit, attempt to close an 197 + * implicitly open zone (if we have any) so that we can implicitly open another 198 + * zone without exceeding the maximum number of open zones. 199 + */ 200 + static bool zloop_close_imp_open_zone(struct zloop_device *zlo) 201 + { 202 + struct zloop_zone *zone; 203 + 204 + lockdep_assert_held(&zlo->open_zones_lock); 205 + 206 + if (zloop_can_open_zone(zlo)) 207 + return true; 208 + 209 + list_for_each_entry(zone, &zlo->open_zones_lru_list, open_zone_entry) { 210 + if (zone->cond == BLK_ZONE_COND_IMP_OPEN) { 211 + zone->cond = BLK_ZONE_COND_CLOSED; 212 + list_del_init(&zone->open_zone_entry); 213 + zlo->nr_open_zones--; 214 + return true; 215 + } 216 + } 217 + 218 + return false; 219 + } 220 + 221 + static bool zloop_open_closed_or_empty_zone(struct zloop_device *zlo, 222 + struct zloop_zone *zone, 223 + bool explicit) 224 + { 225 + spin_lock(&zlo->open_zones_lock); 226 + 227 + if (explicit) { 228 + /* 229 + * Explicit open: we cannot allow this if we have reached the 230 + * maximum open zones limit. 231 + */ 232 + if (!zloop_can_open_zone(zlo)) 233 + goto fail; 234 + zone->cond = BLK_ZONE_COND_EXP_OPEN; 235 + } else { 236 + /* 237 + * Implicit open case: if we have reached the maximum open zones 238 + * limit, try to close an implicitly open zone first. 239 + */ 240 + if (!zloop_close_imp_open_zone(zlo)) 241 + goto fail; 242 + zone->cond = BLK_ZONE_COND_IMP_OPEN; 243 + } 244 + 245 + zlo->nr_open_zones++; 246 + list_add_tail(&zone->open_zone_entry, 247 + &zlo->open_zones_lru_list); 248 + 249 + spin_unlock(&zlo->open_zones_lock); 250 + 251 + return true; 252 + 253 + fail: 254 + spin_unlock(&zlo->open_zones_lock); 255 + 256 + return false; 257 + } 258 + 259 + static bool zloop_do_open_zone(struct zloop_device *zlo, 260 + struct zloop_zone *zone, bool explicit) 261 + { 262 + switch (zone->cond) { 263 + case BLK_ZONE_COND_IMP_OPEN: 264 + case BLK_ZONE_COND_EXP_OPEN: 265 + if (explicit) 266 + zone->cond = BLK_ZONE_COND_EXP_OPEN; 267 + zloop_lru_rotate_open_zone(zlo, zone); 268 + return true; 269 + case BLK_ZONE_COND_EMPTY: 270 + case BLK_ZONE_COND_CLOSED: 271 + return zloop_open_closed_or_empty_zone(zlo, zone, explicit); 272 + default: 273 + return false; 274 + } 173 275 } 174 276 175 277 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) ··· 321 191 322 192 spin_lock_irqsave(&zone->wp_lock, flags); 323 193 if (!file_sectors) { 194 + zloop_lru_remove_open_zone(zlo, zone); 324 195 zone->cond = BLK_ZONE_COND_EMPTY; 325 196 zone->wp = zone->start; 326 197 } else if (file_sectors == zlo->zone_capacity) { 198 + zloop_lru_remove_open_zone(zlo, zone); 327 199 zone->cond = BLK_ZONE_COND_FULL; 328 200 zone->wp = ULLONG_MAX; 329 201 } else { 330 - zone->cond = BLK_ZONE_COND_CLOSED; 202 + if (zone->cond != BLK_ZONE_COND_IMP_OPEN && 203 + zone->cond != BLK_ZONE_COND_EXP_OPEN) 204 + zone->cond = BLK_ZONE_COND_CLOSED; 331 205 zone->wp = zone->start + file_sectors; 332 206 } 333 207 spin_unlock_irqrestore(&zone->wp_lock, flags); ··· 355 221 goto unlock; 356 222 } 357 223 358 - switch (zone->cond) { 359 - case BLK_ZONE_COND_EXP_OPEN: 360 - break; 361 - case BLK_ZONE_COND_EMPTY: 362 - case BLK_ZONE_COND_CLOSED: 363 - case BLK_ZONE_COND_IMP_OPEN: 364 - zone->cond = BLK_ZONE_COND_EXP_OPEN; 365 - break; 366 - case BLK_ZONE_COND_FULL: 367 - default: 224 + if (!zloop_do_open_zone(zlo, zone, true)) 368 225 ret = -EIO; 369 - break; 370 - } 371 226 372 227 unlock: 373 228 mutex_unlock(&zone->lock); ··· 387 264 case BLK_ZONE_COND_IMP_OPEN: 388 265 case BLK_ZONE_COND_EXP_OPEN: 389 266 spin_lock_irqsave(&zone->wp_lock, flags); 267 + zloop_lru_remove_open_zone(zlo, zone); 390 268 if (zone->wp == zone->start) 391 269 zone->cond = BLK_ZONE_COND_EMPTY; 392 270 else ··· 429 305 } 430 306 431 307 spin_lock_irqsave(&zone->wp_lock, flags); 308 + zloop_lru_remove_open_zone(zlo, zone); 432 309 zone->cond = BLK_ZONE_COND_EMPTY; 433 310 zone->wp = zone->start; 434 311 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); ··· 477 352 } 478 353 479 354 spin_lock_irqsave(&zone->wp_lock, flags); 355 + zloop_lru_remove_open_zone(zlo, zone); 480 356 zone->cond = BLK_ZONE_COND_FULL; 481 357 zone->wp = ULLONG_MAX; 482 358 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); ··· 604 478 } 605 479 606 480 /* Implicitly open the target zone. */ 607 - if (zone->cond == BLK_ZONE_COND_CLOSED || 608 - zone->cond == BLK_ZONE_COND_EMPTY) 609 - zone->cond = BLK_ZONE_COND_IMP_OPEN; 481 + if (!zloop_do_open_zone(zlo, zone, false)) { 482 + ret = -EIO; 483 + goto out_unlock; 484 + } 610 485 611 486 /* 612 487 * Advance the write pointer, unless ordered zone append is in use. If ··· 617 490 if (!is_append || !zlo->ordered_zone_append) { 618 491 zone->wp += nr_sectors; 619 492 if (zone->wp == zone_end) { 493 + zloop_lru_remove_open_zone(zlo, zone); 620 494 zone->cond = BLK_ZONE_COND_FULL; 621 495 zone->wp = ULLONG_MAX; 622 496 } ··· 874 746 rq->__sector = zone->wp; 875 747 zone->wp += blk_rq_sectors(rq); 876 748 if (zone->wp >= zone_end) { 749 + zloop_lru_remove_open_zone(zlo, zone); 877 750 zone->cond = BLK_ZONE_COND_FULL; 878 751 zone->wp = ULLONG_MAX; 879 752 } ··· 1072 943 int ret; 1073 944 1074 945 mutex_init(&zone->lock); 946 + INIT_LIST_HEAD(&zone->open_zone_entry); 1075 947 spin_lock_init(&zone->wp_lock); 1076 948 zone->start = (sector_t)zone_no << zlo->zone_shift; 1077 949 ··· 1193 1063 goto out; 1194 1064 } 1195 1065 1066 + if (opts->max_open_zones > nr_zones - opts->nr_conv_zones) { 1067 + pr_err("Invalid maximum number of open zones %u\n", 1068 + opts->max_open_zones); 1069 + goto out; 1070 + } 1071 + 1196 1072 zlo = kvzalloc_flex(*zlo, zones, nr_zones); 1197 1073 if (!zlo) { 1198 1074 ret = -ENOMEM; 1199 1075 goto out; 1200 1076 } 1201 1077 WRITE_ONCE(zlo->state, Zlo_creating); 1078 + spin_lock_init(&zlo->open_zones_lock); 1079 + INIT_LIST_HEAD(&zlo->open_zones_lru_list); 1202 1080 1203 1081 ret = mutex_lock_killable(&zloop_ctl_mutex); 1204 1082 if (ret) ··· 1234 1096 zlo->zone_capacity = zlo->zone_size; 1235 1097 zlo->nr_zones = nr_zones; 1236 1098 zlo->nr_conv_zones = opts->nr_conv_zones; 1099 + zlo->max_open_zones = opts->max_open_zones; 1237 1100 zlo->buffered_io = opts->buffered_io; 1238 1101 zlo->zone_append = opts->zone_append; 1239 1102 if (zlo->zone_append) ··· 1282 1143 lim.logical_block_size = zlo->block_size; 1283 1144 if (zlo->zone_append) 1284 1145 lim.max_hw_zone_append_sectors = lim.max_hw_sectors; 1146 + lim.max_open_zones = zlo->max_open_zones; 1285 1147 1286 1148 zlo->tag_set.ops = &zloop_mq_ops; 1287 1149 zlo->tag_set.nr_hw_queues = opts->nr_queues; ··· 1466 1326 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; 1467 1327 opts->zone_size = ZLOOP_DEF_ZONE_SIZE; 1468 1328 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; 1329 + opts->max_open_zones = ZLOOP_DEF_MAX_OPEN_ZONES; 1469 1330 opts->nr_queues = ZLOOP_DEF_NR_QUEUES; 1470 1331 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; 1471 1332 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; ··· 1544 1403 goto out; 1545 1404 } 1546 1405 opts->nr_conv_zones = token; 1406 + break; 1407 + case ZLOOP_OPT_MAX_OPEN_ZONES: 1408 + if (match_uint(args, &token)) { 1409 + ret = -EINVAL; 1410 + goto out; 1411 + } 1412 + opts->max_open_zones = token; 1547 1413 break; 1548 1414 case ZLOOP_OPT_BASE_DIR: 1549 1415 p = match_strdup(args);