Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

brd: use page reference to protect page lifetime

As discussed [1], hold rcu for copying data from/to page is too heavy,
it's better to protect page with rcu around for page lookup and then
grab a reference to prevent page to be freed by discard.

[1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250811065628.1829339-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Yu Kuai and committed by
Jens Axboe
2a061452 4c7ef92f

+48 -27
+48 -27
drivers/block/brd.c
··· 44 44 }; 45 45 46 46 /* 47 - * Look up and return a brd's page for a given sector. 47 + * Look up and return a brd's page with reference grabbed for a given sector. 48 48 */ 49 49 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) 50 50 { 51 - return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); 51 + struct page *page; 52 + XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); 53 + 54 + rcu_read_lock(); 55 + repeat: 56 + page = xas_load(&xas); 57 + if (xas_retry(&xas, page)) { 58 + xas_reset(&xas); 59 + goto repeat; 60 + } 61 + 62 + if (!page) 63 + goto out; 64 + 65 + if (!get_page_unless_zero(page)) { 66 + xas_reset(&xas); 67 + goto repeat; 68 + } 69 + 70 + if (unlikely(page != xas_reload(&xas))) { 71 + put_page(page); 72 + xas_reset(&xas); 73 + goto repeat; 74 + } 75 + out: 76 + rcu_read_unlock(); 77 + 78 + return page; 52 79 } 53 80 54 81 /* 55 82 * Insert a new page for a given sector, if one does not already exist. 83 + * The returned page will grab reference. 56 84 */ 57 85 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, 58 86 blk_opf_t opf) 59 - __releases(rcu) 60 - __acquires(rcu) 61 87 { 62 88 gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; 63 89 struct page *page, *ret; 64 90 65 - rcu_read_unlock(); 66 91 page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); 67 - if (!page) { 68 - rcu_read_lock(); 92 + if (!page) 69 93 return ERR_PTR(-ENOMEM); 70 - } 71 94 72 95 xa_lock(&brd->brd_pages); 73 96 ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, 74 97 page, gfp); 75 - rcu_read_lock(); 76 - if (ret) { 98 + if (!ret) { 99 + brd->brd_nr_pages++; 100 + get_page(page); 77 101 xa_unlock(&brd->brd_pages); 78 - __free_page(page); 79 - if (xa_is_err(ret)) 80 - return ERR_PTR(xa_err(ret)); 102 + return page; 103 + } 104 + 105 + if (!xa_is_err(ret)) { 106 + get_page(ret); 107 + xa_unlock(&brd->brd_pages); 108 + put_page(page); 81 109 return ret; 82 110 } 83 - brd->brd_nr_pages++; 111 + 84 112 xa_unlock(&brd->brd_pages); 85 - return page; 113 + put_page(page); 114 + return ERR_PTR(xa_err(ret)); 86 115 } 87 116 88 117 /* ··· 124 95 pgoff_t idx; 125 96 126 97 xa_for_each(&brd->brd_pages, idx, page) { 127 - __free_page(page); 98 + put_page(page); 128 99 cond_resched(); 129 100 } 130 101 ··· 146 117 147 118 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 148 119 149 - rcu_read_lock(); 150 120 page = brd_lookup_page(brd, sector); 151 121 if (!page && op_is_write(opf)) { 152 122 page = brd_insert_page(brd, sector, opf); ··· 163 135 memset(kaddr, 0, bv.bv_len); 164 136 } 165 137 kunmap_local(kaddr); 166 - rcu_read_unlock(); 167 138 168 139 bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); 140 + if (page) 141 + put_page(page); 169 142 return true; 170 143 171 144 out_error: 172 - rcu_read_unlock(); 173 145 if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) 174 146 bio_wouldblock_error(bio); 175 147 else 176 148 bio_io_error(bio); 177 149 return false; 178 - } 179 - 180 - static void brd_free_one_page(struct rcu_head *head) 181 - { 182 - struct page *page = container_of(head, struct page, rcu_head); 183 - 184 - __free_page(page); 185 150 } 186 151 187 152 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) ··· 191 170 while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { 192 171 page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); 193 172 if (page) { 194 - call_rcu(&page->rcu_head, brd_free_one_page); 173 + put_page(page); 195 174 brd->brd_nr_pages--; 196 175 } 197 176 aligned_sector += PAGE_SECTORS;