Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: make /sys/block/<dev>/queue/discard_max_bytes writeable

Lots of devices support huge discard sizes these days. Depending
on how the device handles them internally, huge discards can
introduce massive latencies (hundreds of msec) on the device side.

We have a sysfs file, discard_max_bytes, that advertises the max
hardware supported discard size. Make this writeable, and split
the settings into a soft and hard limit. This can be set from
'discard_granularity' and up to the hardware limit.

Add a new sysfs file, 'discard_max_hw_bytes', that shows the hw
set limit.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

+53 -2
+9 -1
Documentation/block/queue-sysfs.txt
··· 20 20 reported by the device. A value of '0' means device does not support 21 21 the discard functionality. 22 22 23 - discard_max_bytes (RO) 23 + discard_max_hw_bytes (RO) 24 24 ---------------------- 25 25 Devices that support discard functionality may have internal limits on 26 26 the number of bytes that can be trimmed or unmapped in a single operation. ··· 28 28 number of bytes that can be discarded in a single operation. Discard 29 29 requests issued to the device must not exceed this limit. A discard_max_bytes 30 30 value of 0 means that the device does not support discard functionality. 31 + 32 + discard_max_bytes (RW) 33 + ---------------------- 34 + While discard_max_hw_bytes is the hardware limit for the device, this 35 + setting is the software limit. Some devices exhibit large latencies when 36 + large discards are issued, setting this value lower will make Linux issue 37 + smaller discards and potentially help reduce latencies induced by large 38 + discard operations. 31 39 32 40 discard_zeroes_data (RO) 33 41 ------------------------
+4
block/blk-settings.c
··· 116 116 lim->chunk_sectors = 0; 117 117 lim->max_write_same_sectors = 0; 118 118 lim->max_discard_sectors = 0; 119 + lim->max_hw_discard_sectors = 0; 119 120 lim->discard_granularity = 0; 120 121 lim->discard_alignment = 0; 121 122 lim->discard_misaligned = 0; ··· 304 303 void blk_queue_max_discard_sectors(struct request_queue *q, 305 304 unsigned int max_discard_sectors) 306 305 { 306 + q->limits.max_hw_discard_sectors = max_discard_sectors; 307 307 q->limits.max_discard_sectors = max_discard_sectors; 308 308 } 309 309 EXPORT_SYMBOL(blk_queue_max_discard_sectors); ··· 643 641 644 642 t->max_discard_sectors = min_not_zero(t->max_discard_sectors, 645 643 b->max_discard_sectors); 644 + t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors, 645 + b->max_hw_discard_sectors); 646 646 t->discard_granularity = max(t->discard_granularity, 647 647 b->discard_granularity); 648 648 t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
+39 -1
block/blk-sysfs.c
··· 145 145 return queue_var_show(q->limits.discard_granularity, page); 146 146 } 147 147 148 + static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page) 149 + { 150 + unsigned long long val; 151 + 152 + val = q->limits.max_hw_discard_sectors << 9; 153 + return sprintf(page, "%llu\n", val); 154 + } 155 + 148 156 static ssize_t queue_discard_max_show(struct request_queue *q, char *page) 149 157 { 150 158 return sprintf(page, "%llu\n", 151 159 (unsigned long long)q->limits.max_discard_sectors << 9); 160 + } 161 + 162 + static ssize_t queue_discard_max_store(struct request_queue *q, 163 + const char *page, size_t count) 164 + { 165 + unsigned long max_discard; 166 + ssize_t ret = queue_var_store(&max_discard, page, count); 167 + 168 + if (ret < 0) 169 + return ret; 170 + 171 + if (max_discard & (q->limits.discard_granularity - 1)) 172 + return -EINVAL; 173 + 174 + max_discard >>= 9; 175 + if (max_discard > UINT_MAX) 176 + return -EINVAL; 177 + 178 + if (max_discard > q->limits.max_hw_discard_sectors) 179 + max_discard = q->limits.max_hw_discard_sectors; 180 + 181 + q->limits.max_discard_sectors = max_discard; 182 + return ret; 152 183 } 153 184 154 185 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) ··· 391 360 .show = queue_discard_granularity_show, 392 361 }; 393 362 363 + static struct queue_sysfs_entry queue_discard_max_hw_entry = { 364 + .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO }, 365 + .show = queue_discard_max_hw_show, 366 + }; 367 + 394 368 static struct queue_sysfs_entry queue_discard_max_entry = { 395 - .attr = {.name = "discard_max_bytes", .mode = S_IRUGO }, 369 + .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR }, 396 370 .show = queue_discard_max_show, 371 + .store = queue_discard_max_store, 397 372 }; 398 373 399 374 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { ··· 458 421 &queue_io_opt_entry.attr, 459 422 &queue_discard_granularity_entry.attr, 460 423 &queue_discard_max_entry.attr, 424 + &queue_discard_max_hw_entry.attr, 461 425 &queue_discard_zeroes_data_entry.attr, 462 426 &queue_write_same_max_entry.attr, 463 427 &queue_nonrot_entry.attr,
+1
include/linux/blkdev.h
··· 268 268 unsigned int io_min; 269 269 unsigned int io_opt; 270 270 unsigned int max_discard_sectors; 271 + unsigned int max_hw_discard_sectors; 271 272 unsigned int max_write_same_sectors; 272 273 unsigned int discard_granularity; 273 274 unsigned int discard_alignment;