Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: remove the discard_zeroes_data flag

Now that we use the proper REQ_OP_WRITE_ZEROES operation everywhere we can
kill this hack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Christoph Hellwig and committed by
Jens Axboe
48920ff2 45c21793

+27 -124
+2 -8
Documentation/ABI/testing/sysfs-block
··· 213 213 Date: May 2011 214 214 Contact: Martin K. Petersen <martin.petersen@oracle.com> 215 215 Description: 216 - Devices that support discard functionality may return 217 - stale or random data when a previously discarded block 218 - is read back. This can cause problems if the filesystem 219 - expects discarded blocks to be explicitly cleared. If a 220 - device reports that it deterministically returns zeroes 221 - when a discarded area is read the discard_zeroes_data 222 - parameter will be set to one. Otherwise it will be 0 and 223 - the result of reading a discarded area is undefined. 216 + Will always return 0. Don't rely on any specific behavior 217 + for discards, and don't read this file. 224 218 225 219 What: /sys/block/<disk>/queue/write_same_max_bytes 226 220 Date: January 2012
-5
Documentation/block/queue-sysfs.txt
··· 43 43 smaller discards and potentially help reduce latencies induced by large 44 44 discard operations. 45 45 46 - discard_zeroes_data (RO) 47 - ------------------------ 48 - When read, this file will show if the discarded block are zeroed by the 49 - device or not. If its value is '1' the blocks are zeroed otherwise not. 50 - 51 46 hw_sector_size (RO) 52 47 ------------------- 53 48 This is the hardware sector size of the device, in bytes.
+1 -6
block/blk-lib.c
··· 37 37 return -ENXIO; 38 38 39 39 if (flags & BLKDEV_DISCARD_SECURE) { 40 - if (flags & BLKDEV_DISCARD_ZERO) 41 - return -EOPNOTSUPP; 42 40 if (!blk_queue_secure_erase(q)) 43 41 return -EOPNOTSUPP; 44 42 op = REQ_OP_SECURE_ERASE; 45 43 } else { 46 44 if (!blk_queue_discard(q)) 47 - return -EOPNOTSUPP; 48 - if ((flags & BLKDEV_DISCARD_ZERO) && 49 - !q->limits.discard_zeroes_data) 50 45 return -EOPNOTSUPP; 51 46 op = REQ_OP_DISCARD; 52 47 } ··· 121 126 &bio); 122 127 if (!ret && bio) { 123 128 ret = submit_bio_wait(bio); 124 - if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO)) 129 + if (ret == -EOPNOTSUPP) 125 130 ret = 0; 126 131 bio_put(bio); 127 132 }
-3
block/blk-settings.c
··· 103 103 lim->discard_granularity = 0; 104 104 lim->discard_alignment = 0; 105 105 lim->discard_misaligned = 0; 106 - lim->discard_zeroes_data = 0; 107 106 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; 108 107 lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); 109 108 lim->alignment_offset = 0; ··· 126 127 blk_set_default_limits(lim); 127 128 128 129 /* Inherit limits from component devices */ 129 - lim->discard_zeroes_data = 1; 130 130 lim->max_segments = USHRT_MAX; 131 131 lim->max_discard_segments = 1; 132 132 lim->max_hw_sectors = UINT_MAX; ··· 607 609 t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); 608 610 609 611 t->cluster &= b->cluster; 610 - t->discard_zeroes_data &= b->discard_zeroes_data; 611 612 612 613 /* Physical block size a multiple of the logical block size? */ 613 614 if (t->physical_block_size & (t->logical_block_size - 1)) {
+1 -1
block/blk-sysfs.c
··· 208 208 209 209 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) 210 210 { 211 - return queue_var_show(queue_discard_zeroes_data(q), page); 211 + return queue_var_show(0, page); 212 212 } 213 213 214 214 static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
+1 -1
block/compat_ioctl.c
··· 685 685 case BLKALIGNOFF: 686 686 return compat_put_int(arg, bdev_alignment_offset(bdev)); 687 687 case BLKDISCARDZEROES: 688 - return compat_put_uint(arg, bdev_discard_zeroes_data(bdev)); 688 + return compat_put_uint(arg, 0); 689 689 case BLKFLSBUF: 690 690 case BLKROSET: 691 691 case BLKDISCARD:
+1 -1
block/ioctl.c
··· 547 547 case BLKALIGNOFF: 548 548 return put_int(arg, bdev_alignment_offset(bdev)); 549 549 case BLKDISCARDZEROES: 550 - return put_uint(arg, bdev_discard_zeroes_data(bdev)); 550 + return put_uint(arg, 0); 551 551 case BLKSECTGET: 552 552 max_sectors = min_t(unsigned int, USHRT_MAX, 553 553 queue_max_sectors(bdev_get_queue(bdev)));
-2
drivers/block/drbd/drbd_main.c
··· 931 931 p->qlim->io_min = cpu_to_be32(queue_io_min(q)); 932 932 p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); 933 933 p->qlim->discard_enabled = blk_queue_discard(q); 934 - p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q); 935 934 p->qlim->write_same_capable = !!q->limits.max_write_same_sectors; 936 935 } else { 937 936 q = device->rq_queue; ··· 940 941 p->qlim->io_min = cpu_to_be32(queue_io_min(q)); 941 942 p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); 942 943 p->qlim->discard_enabled = 0; 943 - p->qlim->discard_zeroes_data = 0; 944 944 p->qlim->write_same_capable = 0; 945 945 } 946 946 }
+1 -6
drivers/block/drbd/drbd_nl.c
··· 1199 1199 struct drbd_connection *connection = first_peer_device(device)->connection; 1200 1200 bool can_do = b ? blk_queue_discard(b) : true; 1201 1201 1202 - if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) { 1203 - can_do = false; 1204 - drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n"); 1205 - } 1206 1202 if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { 1207 1203 can_do = false; 1208 1204 drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); ··· 1480 1484 if (disk_conf->al_extents > drbd_al_extents_max(nbc)) 1481 1485 disk_conf->al_extents = drbd_al_extents_max(nbc); 1482 1486 1483 - if (!blk_queue_discard(q) 1484 - || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) { 1487 + if (!blk_queue_discard(q)) { 1485 1488 if (disk_conf->rs_discard_granularity) { 1486 1489 disk_conf->rs_discard_granularity = 0; /* disable feature */ 1487 1490 drbd_info(device, "rs_discard_granularity feature disabled\n");
-2
drivers/block/loop.c
··· 828 828 q->limits.discard_alignment = 0; 829 829 blk_queue_max_discard_sectors(q, 0); 830 830 blk_queue_max_write_zeroes_sectors(q, 0); 831 - q->limits.discard_zeroes_data = 0; 832 831 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 833 832 return; 834 833 } ··· 836 837 q->limits.discard_alignment = 0; 837 838 blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 838 839 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 839 - q->limits.discard_zeroes_data = 1; 840 840 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 841 841 } 842 842
-1
drivers/block/mtip32xx/mtip32xx.c
··· 4025 4025 dd->queue->limits.discard_granularity = 4096; 4026 4026 blk_queue_max_discard_sectors(dd->queue, 4027 4027 MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); 4028 - dd->queue->limits.discard_zeroes_data = 0; 4029 4028 } 4030 4029 4031 4030 /* Set the capacity of the device in 512 byte sectors. */
-1
drivers/block/nbd.c
··· 1110 1110 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1111 1111 disk->queue->limits.discard_granularity = 512; 1112 1112 blk_queue_max_discard_sectors(disk->queue, UINT_MAX); 1113 - disk->queue->limits.discard_zeroes_data = 0; 1114 1113 blk_queue_max_hw_sectors(disk->queue, 65536); 1115 1114 disk->queue->limits.max_sectors = 256; 1116 1115
-1
drivers/md/dm-cache-target.c
··· 2773 2773 2774 2774 ti->num_discard_bios = 1; 2775 2775 ti->discards_supported = true; 2776 - ti->discard_zeroes_data_unsupported = true; 2777 2776 ti->split_discard_bios = false; 2778 2777 2779 2778 cache->features = ca->features;
-1
drivers/md/dm-crypt.c
··· 2030 2030 wake_up_process(cc->write_thread); 2031 2031 2032 2032 ti->num_flush_bios = 1; 2033 - ti->discard_zeroes_data_unsupported = true; 2034 2033 2035 2034 return 0; 2036 2035
+3 -3
drivers/md/dm-raid.c
··· 2813 2813 /* Assume discards not supported until after checks below. */ 2814 2814 ti->discards_supported = false; 2815 2815 2816 - /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */ 2816 + /* 2817 + * XXX: RAID level 4,5,6 require zeroing for safety. 2818 + */ 2817 2819 raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6); 2818 2820 2819 2821 for (i = 0; i < rs->raid_disks; i++) { ··· 2829 2827 return; 2830 2828 2831 2829 if (raid456) { 2832 - if (!q->limits.discard_zeroes_data) 2833 - return; 2834 2830 if (!devices_handle_discard_safely) { 2835 2831 DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty."); 2836 2832 DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
-1
drivers/md/dm-raid1.c
··· 1124 1124 ti->num_flush_bios = 1; 1125 1125 ti->num_discard_bios = 1; 1126 1126 ti->per_io_data_size = sizeof(struct dm_raid1_bio_record); 1127 - ti->discard_zeroes_data_unsupported = true; 1128 1127 1129 1128 ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0); 1130 1129 if (!ms->kmirrord_wq) {
-19
drivers/md/dm-table.c
··· 1449 1449 return false; 1450 1450 } 1451 1451 1452 - static bool dm_table_discard_zeroes_data(struct dm_table *t) 1453 - { 1454 - struct dm_target *ti; 1455 - unsigned i = 0; 1456 - 1457 - /* Ensure that all targets supports discard_zeroes_data. */ 1458 - while (i < dm_table_get_num_targets(t)) { 1459 - ti = dm_table_get_target(t, i++); 1460 - 1461 - if (ti->discard_zeroes_data_unsupported) 1462 - return false; 1463 - } 1464 - 1465 - return true; 1466 - } 1467 - 1468 1452 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, 1469 1453 sector_t start, sector_t len, void *data) 1470 1454 { ··· 1603 1619 fua = true; 1604 1620 } 1605 1621 blk_queue_write_cache(q, wc, fua); 1606 - 1607 - if (!dm_table_discard_zeroes_data(t)) 1608 - q->limits.discard_zeroes_data = 0; 1609 1622 1610 1623 /* Ensure that all underlying devices are non-rotational. */ 1611 1624 if (dm_table_all_devices_attribute(t, device_is_nonrot))
-2
drivers/md/dm-thin.c
··· 3263 3263 * them down to the data device. The thin device's discard 3264 3264 * processing will cause mappings to be removed from the btree. 3265 3265 */ 3266 - ti->discard_zeroes_data_unsupported = true; 3267 3266 if (pf.discard_enabled && pf.discard_passdown) { 3268 3267 ti->num_discard_bios = 1; 3269 3268 ··· 4118 4119 ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); 4119 4120 4120 4121 /* In case the pool supports discards, pass them on. */ 4121 - ti->discard_zeroes_data_unsupported = true; 4122 4122 if (tc->pool->pf.discard_enabled) { 4123 4123 ti->discards_supported = true; 4124 4124 ti->num_discard_bios = 1;
+16 -34
drivers/md/raid5.c
··· 7227 7227 7228 7228 if (mddev->queue) { 7229 7229 int chunk_size; 7230 - bool discard_supported = true; 7231 7230 /* read-ahead size must cover two whole stripes, which 7232 7231 * is 2 * (datadisks) * chunksize where 'n' is the 7233 7232 * number of raid devices ··· 7262 7263 blk_queue_max_discard_sectors(mddev->queue, 7263 7264 0xfffe * STRIPE_SECTORS); 7264 7265 7265 - /* 7266 - * unaligned part of discard request will be ignored, so can't 7267 - * guarantee discard_zeroes_data 7268 - */ 7269 - mddev->queue->limits.discard_zeroes_data = 0; 7270 - 7271 7266 blk_queue_max_write_same_sectors(mddev->queue, 0); 7272 7267 blk_queue_max_write_zeroes_sectors(mddev->queue, 0); 7273 7268 ··· 7270 7277 rdev->data_offset << 9); 7271 7278 disk_stack_limits(mddev->gendisk, rdev->bdev, 7272 7279 rdev->new_data_offset << 9); 7273 - /* 7274 - * discard_zeroes_data is required, otherwise data 7275 - * could be lost. Consider a scenario: discard a stripe 7276 - * (the stripe could be inconsistent if 7277 - * discard_zeroes_data is 0); write one disk of the 7278 - * stripe (the stripe could be inconsistent again 7279 - * depending on which disks are used to calculate 7280 - * parity); the disk is broken; The stripe data of this 7281 - * disk is lost. 7282 - */ 7283 - if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) || 7284 - !bdev_get_queue(rdev->bdev)-> 7285 - limits.discard_zeroes_data) 7286 - discard_supported = false; 7287 - /* Unfortunately, discard_zeroes_data is not currently 7288 - * a guarantee - just a hint. So we only allow DISCARD 7289 - * if the sysadmin has confirmed that only safe devices 7290 - * are in use by setting a module parameter. 7291 - */ 7292 - if (!devices_handle_discard_safely) { 7293 - if (discard_supported) { 7294 - pr_info("md/raid456: discard support disabled due to uncertainty.\n"); 7295 - pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n"); 7296 - } 7297 - discard_supported = false; 7298 - } 7299 7280 } 7300 7281 7301 - if (discard_supported && 7282 + /* 7283 + * zeroing is required, otherwise data 7284 + * could be lost. Consider a scenario: discard a stripe 7285 + * (the stripe could be inconsistent if 7286 + * discard_zeroes_data is 0); write one disk of the 7287 + * stripe (the stripe could be inconsistent again 7288 + * depending on which disks are used to calculate 7289 + * parity); the disk is broken; The stripe data of this 7290 + * disk is lost. 7291 + * 7292 + * We only allow DISCARD if the sysadmin has confirmed that 7293 + * only safe devices are in use by setting a module parameter. 7294 + * A better idea might be to turn DISCARD into WRITE_ZEROES 7295 + * requests, as that is required to be safe. 7296 + */ 7297 + if (devices_handle_discard_safely && 7302 7298 mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && 7303 7299 mddev->queue->limits.discard_granularity >= stripe) 7304 7300 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
-5
drivers/scsi/sd.c
··· 644 644 unsigned int logical_block_size = sdkp->device->sector_size; 645 645 unsigned int max_blocks = 0; 646 646 647 - q->limits.discard_zeroes_data = 0; 648 - 649 647 /* 650 648 * When LBPRZ is reported, discard alignment and granularity 651 649 * must be fixed to the logical block size. Otherwise the block ··· 679 681 case SD_LBP_WS16: 680 682 max_blocks = min_not_zero(sdkp->max_ws_blocks, 681 683 (u32)SD_MAX_WS16_BLOCKS); 682 - q->limits.discard_zeroes_data = sdkp->lbprz; 683 684 break; 684 685 685 686 case SD_LBP_WS10: 686 687 max_blocks = min_not_zero(sdkp->max_ws_blocks, 687 688 (u32)SD_MAX_WS10_BLOCKS); 688 - q->limits.discard_zeroes_data = sdkp->lbprz; 689 689 break; 690 690 691 691 case SD_LBP_ZERO: 692 692 max_blocks = min_not_zero(sdkp->max_ws_blocks, 693 693 (u32)SD_MAX_WS10_BLOCKS); 694 - q->limits.discard_zeroes_data = 1; 695 694 break; 696 695 } 697 696
+1 -1
drivers/target/target_core_device.c
··· 851 851 attrib->unmap_granularity = q->limits.discard_granularity / block_size; 852 852 attrib->unmap_granularity_alignment = q->limits.discard_alignment / 853 853 block_size; 854 - attrib->unmap_zeroes_data = q->limits.discard_zeroes_data; 854 + attrib->unmap_zeroes_data = 0; 855 855 return true; 856 856 } 857 857 EXPORT_SYMBOL(target_configure_unmap_from_queue);
-15
include/linux/blkdev.h
··· 339 339 unsigned char misaligned; 340 340 unsigned char discard_misaligned; 341 341 unsigned char cluster; 342 - unsigned char discard_zeroes_data; 343 342 unsigned char raid_partial_stripes_expensive; 344 343 enum blk_zoned_model zoned; 345 344 }; ··· 1340 1341 sector_t nr_sects, gfp_t gfp_mask, struct page *page); 1341 1342 1342 1343 #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ 1343 - #define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ 1344 1344 1345 1345 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1346 1346 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); ··· 1537 1539 return bdev->bd_part->discard_alignment; 1538 1540 1539 1541 return q->limits.discard_alignment; 1540 - } 1541 - 1542 - static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) 1543 - { 1544 - if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) 1545 - return 1; 1546 - 1547 - return 0; 1548 - } 1549 - 1550 - static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) 1551 - { 1552 - return queue_discard_zeroes_data(bdev_get_queue(bdev)); 1553 1542 } 1554 1543 1555 1544 static inline unsigned int bdev_write_same(struct block_device *bdev)
-5
include/linux/device-mapper.h
··· 296 296 * on max_io_len boundary. 297 297 */ 298 298 bool split_discard_bios:1; 299 - 300 - /* 301 - * Set if this target does not return zeroes on discarded blocks. 302 - */ 303 - bool discard_zeroes_data_unsupported:1; 304 299 }; 305 300 306 301 /* Each target can link one of these into the table */