Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: make bio auto-integrity deadlock safe

The current block layer automatic integrity protection allocates the
actual integrity buffer, which has three problems:

- because it happens at the bottom of the I/O stack and doesn't use a
mempool it can deadlock under load
- because the data size in a bio is almost unbounded when using lage
folios it can relatively easily exceed the maximum kmalloc size
- even when it does not exceed the maximum kmalloc size, it could
exceed the maximum segment size of the device

Fix this by limiting the I/O size so that we can allocate at least a
2MiB integrity buffer, i.e. 128MiB for 8 byte PI and 512 byte integrity
intervals, and create a mempool as a last resort for this maximum size,
mirroring the scheme used for bvecs. As a nice upside none of this
can fail now, so we remove the error handling and open code the
trivial addition of the bip vec.

The new allocation helpers sit outside of bio-integrity-auto.c because
I plan to reuse them for file system based PI in the near future.

Fixes: 7ba1ba12eeef ("block: Block layer data integrity support")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Christoph Hellwig and committed by
Jens Axboe
ec7f31b2 eef09f74

+83 -19
+3 -19
block/bio-integrity-auto.c
··· 29 29 { 30 30 bid->bio->bi_integrity = NULL; 31 31 bid->bio->bi_opf &= ~REQ_INTEGRITY; 32 - kfree(bvec_virt(bid->bip.bip_vec)); 32 + bio_integrity_free_buf(&bid->bip); 33 33 mempool_free(bid, &bid_pool); 34 34 } 35 35 ··· 110 110 struct bio_integrity_data *bid; 111 111 bool set_flags = true; 112 112 gfp_t gfp = GFP_NOIO; 113 - unsigned int len; 114 - void *buf; 115 113 116 114 if (!bi) 117 115 return true; ··· 150 152 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) 151 153 return true; 152 154 153 - /* Allocate kernel buffer for protection data */ 154 - len = bio_integrity_bytes(bi, bio_sectors(bio)); 155 - buf = kmalloc(len, gfp); 156 - if (!buf) 157 - goto err_end_io; 158 155 bid = mempool_alloc(&bid_pool, GFP_NOIO); 159 156 bio_integrity_init(bio, &bid->bip, &bid->bvec, 1); 160 - 161 157 bid->bio = bio; 162 - 163 158 bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; 159 + bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO); 160 + 164 161 bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); 165 162 166 163 if (set_flags) { ··· 167 174 bid->bip.bip_flags |= BIP_CHECK_REFTAG; 168 175 } 169 176 170 - if (bio_integrity_add_page(bio, virt_to_page(buf), len, 171 - offset_in_page(buf)) < len) 172 - goto err_end_io; 173 - 174 177 /* Auto-generate integrity metadata if this is a write */ 175 178 if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) 176 179 blk_integrity_generate(bio); 177 180 else 178 181 bid->saved_bio_iter = bio->bi_iter; 179 182 return true; 180 - 181 - err_end_io: 182 - bio->bi_status = BLK_STS_RESOURCE; 183 - bio_endio(bio); 184 - return false; 185 183 } 186 184 EXPORT_SYMBOL(bio_integrity_prep); 187 185
+48
block/bio-integrity.c
··· 14 14 struct bio_vec bvecs[]; 15 15 }; 16 16 17 + static mempool_t integrity_buf_pool; 18 + 19 + void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer) 20 + { 21 + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 22 + struct bio_integrity_payload *bip = bio_integrity(bio); 23 + unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio)); 24 + gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0); 25 + void *buf; 26 + 27 + buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) | 28 + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN); 29 + if (unlikely(!buf)) { 30 + struct page *page; 31 + 32 + page = mempool_alloc(&integrity_buf_pool, GFP_NOFS); 33 + if (zero_buffer) 34 + memset(page_address(page), 0, len); 35 + bvec_set_page(&bip->bip_vec[0], page, len, 0); 36 + bip->bip_flags |= BIP_MEMPOOL; 37 + } else { 38 + bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len, 39 + offset_in_page(buf)); 40 + } 41 + 42 + bip->bip_vcnt = 1; 43 + bip->bip_iter.bi_size = len; 44 + } 45 + 46 + void bio_integrity_free_buf(struct bio_integrity_payload *bip) 47 + { 48 + struct bio_vec *bv = &bip->bip_vec[0]; 49 + 50 + if (bip->bip_flags & BIP_MEMPOOL) 51 + mempool_free(bv->bv_page, &integrity_buf_pool); 52 + else 53 + kfree(bvec_virt(bv)); 54 + } 55 + 17 56 /** 18 57 * bio_integrity_free - Free bio integrity payload 19 58 * @bio: bio containing bip to be freed ··· 477 438 478 439 return 0; 479 440 } 441 + 442 + static int __init bio_integrity_initfn(void) 443 + { 444 + if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE, 445 + get_order(BLK_INTEGRITY_MAX_SIZE))) 446 + panic("bio: can't create integrity buf pool\n"); 447 + return 0; 448 + } 449 + subsys_initcall(bio_integrity_initfn);
+21
block/blk-settings.c
··· 123 123 return 0; 124 124 } 125 125 126 + /* 127 + * Maximum size of I/O that needs a block layer integrity buffer. Limited 128 + * by the number of intervals for which we can fit the integrity buffer into 129 + * the buffer size. Because the buffer is a single segment it is also limited 130 + * by the maximum segment size. 131 + */ 132 + static inline unsigned int max_integrity_io_size(struct queue_limits *lim) 133 + { 134 + return min_t(unsigned int, lim->max_segment_size, 135 + (BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) << 136 + lim->integrity.interval_exp); 137 + } 138 + 126 139 static int blk_validate_integrity_limits(struct queue_limits *lim) 127 140 { 128 141 struct blk_integrity *bi = &lim->integrity; ··· 196 183 197 184 if (!bi->interval_exp) 198 185 bi->interval_exp = ilog2(lim->logical_block_size); 186 + 187 + /* 188 + * The block layer automatically adds integrity data for bios that don't 189 + * already have it. Limit the I/O size so that a single maximum size 190 + * metadata segment can cover the integrity data for the entire I/O. 191 + */ 192 + lim->max_sectors = min(lim->max_sectors, 193 + max_integrity_io_size(lim) >> SECTOR_SHIFT); 199 194 200 195 return 0; 201 196 }
+6
include/linux/bio-integrity.h
··· 14 14 BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ 15 15 BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ 16 16 BIP_P2P_DMA = 1 << 8, /* using P2P address */ 17 + 18 + BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */ 17 19 }; 18 20 19 21 struct bio_integrity_payload { ··· 142 140 return 0; 143 141 } 144 142 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 143 + 144 + void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer); 145 + void bio_integrity_free_buf(struct bio_integrity_payload *bip); 146 + 145 147 #endif /* _LINUX_BIO_INTEGRITY_H */
+5
include/linux/blk-integrity.h
··· 8 8 9 9 struct request; 10 10 11 + /* 12 + * Maximum contiguous integrity buffer allocation. 13 + */ 14 + #define BLK_INTEGRITY_MAX_SIZE SZ_2M 15 + 11 16 enum blk_integrity_flags { 12 17 BLK_INTEGRITY_NOVERIFY = 1 << 0, 13 18 BLK_INTEGRITY_NOGENERATE = 1 << 1,