Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: remove the same_page output argument to bvec_try_merge_page

bvec_try_merge_page currently returns if the added page fragment is
within the same page as the last page in the last current bio_vec.

This information is used by __bio_iov_iter_get_pages so that we always
have a single folio pin per page even when the page is split over
multiple __bio_iov_iter_get_pages calls.

Threading this through the entire lowlevel add page to bio logic is
annoying and inefficient and leads to less code sharing than otherwise
possible. Instead add code to __bio_iov_iter_get_pages that checks if
the bio_vecs did not change and thus a merge into the last segment must
have happened, and if there is an offset into the page for the currently
added fragment, because if yes we must have already had a previous
fragment of the same page in the last bio_vec. While this is still a bit
ugly, it keeps the logic in the one place that needs it and allows for
more code sharing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250512042354.514329-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Christoph Hellwig and committed by
Jens Axboe
77fd359b d1ba22ab

+25 -37
+1 -3
block/bio-integrity.c
··· 127 127 128 128 if (bip->bip_vcnt > 0) { 129 129 struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; 130 - bool same_page = false; 131 130 132 - if (bvec_try_merge_hw_page(q, bv, page, len, offset, 133 - &same_page)) { 131 + if (bvec_try_merge_hw_page(q, bv, page, len, offset)) { 134 132 bip->bip_iter.bi_size += len; 135 133 return len; 136 134 }
+23 -32
block/bio.c
··· 920 920 } 921 921 922 922 static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, 923 - unsigned int len, unsigned int off, bool *same_page) 923 + unsigned int len, unsigned int off) 924 924 { 925 925 size_t bv_end = bv->bv_offset + bv->bv_len; 926 926 phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1; ··· 933 933 if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) 934 934 return false; 935 935 936 - *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) & 937 - PAGE_MASK)); 938 - if (!*same_page) { 936 + if ((vec_end_addr & PAGE_MASK) != ((page_addr + off) & PAGE_MASK)) { 939 937 if (IS_ENABLED(CONFIG_KMSAN)) 940 938 return false; 941 939 if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE) ··· 953 955 * helpers to split. Hopefully this will go away soon. 954 956 */ 955 957 bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, 956 - struct page *page, unsigned len, unsigned offset, 957 - bool *same_page) 958 + struct page *page, unsigned len, unsigned offset) 958 959 { 959 960 unsigned long mask = queue_segment_boundary(q); 960 961 phys_addr_t addr1 = bvec_phys(bv); ··· 963 966 return false; 964 967 if (len > queue_max_segment_size(q) - bv->bv_len) 965 968 return false; 966 - return bvec_try_merge_page(bv, page, len, offset, same_page); 969 + return bvec_try_merge_page(bv, page, len, offset); 967 970 } 968 971 969 972 /** ··· 1017 1020 int bio_add_page(struct bio *bio, struct page *page, 1018 1021 unsigned int len, unsigned int offset) 1019 1022 { 1020 - bool same_page = false; 1021 - 1022 1023 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) 1023 1024 return 0; 1024 1025 if (bio->bi_iter.bi_size > UINT_MAX - len) ··· 1024 1029 1025 1030 if (bio->bi_vcnt > 0 && 1026 1031 bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], 1027 - page, len, offset, &same_page)) { 1032 + page, len, offset)) { 1028 1033 bio->bi_iter.bi_size += len; 1029 1034 return len; 1030 1035 } ··· 1156 1161 bio_set_flag(bio, BIO_CLONED); 1157 1162 } 1158 1163 1159 - static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len, 1160 - size_t offset) 1161 - { 1162 - bool same_page = false; 1163 - 1164 - if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len)) 1165 - return -EIO; 1166 - 1167 - if (bio->bi_vcnt > 0 && 1168 - bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], 1169 - folio_page(folio, 0), len, offset, 1170 - &same_page)) { 1171 - bio->bi_iter.bi_size += len; 1172 - if (same_page && bio_flagged(bio, BIO_PAGE_PINNED)) 1173 - unpin_user_folio(folio, 1); 1174 - return 0; 1175 - } 1176 - bio_add_folio_nofail(bio, folio, len, offset); 1177 - return 0; 1178 - } 1179 - 1180 1164 static unsigned int get_contig_folio_len(unsigned int *num_pages, 1181 1165 struct page **pages, unsigned int i, 1182 1166 struct folio *folio, size_t left, ··· 1250 1276 for (left = size, i = 0; left > 0; left -= len, i += num_pages) { 1251 1277 struct page *page = pages[i]; 1252 1278 struct folio *folio = page_folio(page); 1279 + unsigned int old_vcnt = bio->bi_vcnt; 1253 1280 1254 1281 folio_offset = ((size_t)folio_page_idx(folio, page) << 1255 1282 PAGE_SHIFT) + offset; ··· 1263 1288 len = get_contig_folio_len(&num_pages, pages, i, 1264 1289 folio, left, offset); 1265 1290 1266 - bio_iov_add_folio(bio, folio, len, folio_offset); 1291 + if (!bio_add_folio(bio, folio, len, folio_offset)) { 1292 + WARN_ON_ONCE(1); 1293 + ret = -EINVAL; 1294 + goto out; 1295 + } 1296 + 1297 + if (bio_flagged(bio, BIO_PAGE_PINNED)) { 1298 + /* 1299 + * We're adding another fragment of a page that already 1300 + * was part of the last segment. Undo our pin as the 1301 + * page was pinned when an earlier fragment of it was 1302 + * added to the bio and __bio_release_pages expects a 1303 + * single pin per page. 1304 + */ 1305 + if (offset && bio->bi_vcnt == old_vcnt) 1306 + unpin_user_folio(folio, 1); 1307 + } 1267 1308 offset = 0; 1268 1309 } 1269 1310
+1 -2
block/blk.h
··· 103 103 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); 104 104 105 105 bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, 106 - struct page *page, unsigned len, unsigned offset, 107 - bool *same_page); 106 + struct page *page, unsigned len, unsigned offset); 108 107 109 108 static inline bool biovec_phys_mergeable(struct request_queue *q, 110 109 struct bio_vec *vec1, struct bio_vec *vec2)