Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at nocache-cleanup 437 lines 13 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5#include <linux/blk-integrity.h> 6#include <linux/blk-mq-dma.h> 7#include "blk.h" 8 9struct phys_vec { 10 phys_addr_t paddr; 11 u32 len; 12}; 13 14static bool __blk_map_iter_next(struct blk_map_iter *iter) 15{ 16 if (iter->iter.bi_size) 17 return true; 18 if (!iter->bio || !iter->bio->bi_next) 19 return false; 20 21 iter->bio = iter->bio->bi_next; 22 if (iter->is_integrity) { 23 iter->iter = bio_integrity(iter->bio)->bip_iter; 24 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 25 } else { 26 iter->iter = iter->bio->bi_iter; 27 iter->bvecs = iter->bio->bi_io_vec; 28 } 29 return true; 30} 31 32static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 33 struct phys_vec *vec) 34{ 35 unsigned int max_size; 36 struct bio_vec bv; 37 38 if (!iter->iter.bi_size) 39 return false; 40 41 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 42 vec->paddr = bvec_phys(&bv); 43 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 44 bv.bv_len = min(bv.bv_len, max_size); 45 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 46 47 /* 48 * If we are entirely done with this bi_io_vec entry, check if the next 49 * one could be merged into it. This typically happens when moving to 50 * the next bio, but some callers also don't pack bvecs tight. 51 */ 52 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 53 struct bio_vec next; 54 55 if (!__blk_map_iter_next(iter)) 56 break; 57 58 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 59 if (bv.bv_len + next.bv_len > max_size || 60 !biovec_phys_mergeable(req->q, &bv, &next)) 61 break; 62 63 bv.bv_len += next.bv_len; 64 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 65 } 66 67 vec->len = bv.bv_len; 68 return true; 69} 70 71/* 72 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 73 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 74 * we need to ensure our segments are aligned to this as well. 75 * 76 * Note that there is no point in using the slightly more complicated IOVA based 77 * path for single segment mappings. 78 */ 79static inline bool blk_can_dma_map_iova(struct request *req, 80 struct device *dma_dev) 81{ 82 return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev)); 83} 84 85static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 86{ 87 iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr); 88 iter->len = vec->len; 89 return true; 90} 91 92static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 93 struct blk_dma_iter *iter, struct phys_vec *vec) 94{ 95 unsigned int attrs = 0; 96 97 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 98 attrs |= DMA_ATTR_MMIO; 99 100 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 101 rq_dma_dir(req), attrs); 102 if (dma_mapping_error(dma_dev, iter->addr)) { 103 iter->status = BLK_STS_RESOURCE; 104 return false; 105 } 106 iter->len = vec->len; 107 return true; 108} 109 110static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 111 struct dma_iova_state *state, struct blk_dma_iter *iter, 112 struct phys_vec *vec) 113{ 114 enum dma_data_direction dir = rq_dma_dir(req); 115 unsigned int mapped = 0; 116 unsigned int attrs = 0; 117 int error; 118 119 iter->addr = state->addr; 120 iter->len = dma_iova_size(state); 121 122 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 123 attrs |= DMA_ATTR_MMIO; 124 125 do { 126 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 127 vec->len, dir, attrs); 128 if (error) 129 break; 130 mapped += vec->len; 131 } while (blk_map_iter_next(req, &iter->iter, vec)); 132 133 error = dma_iova_sync(dma_dev, state, 0, mapped); 134 if (error) { 135 iter->status = errno_to_blk_status(error); 136 return false; 137 } 138 139 return true; 140} 141 142static inline void blk_rq_map_iter_init(struct request *rq, 143 struct blk_map_iter *iter) 144{ 145 struct bio *bio = rq->bio; 146 147 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 148 *iter = (struct blk_map_iter) { 149 .bvecs = &rq->special_vec, 150 .iter = { 151 .bi_size = rq->special_vec.bv_len, 152 } 153 }; 154 } else if (bio) { 155 *iter = (struct blk_map_iter) { 156 .bio = bio, 157 .bvecs = bio->bi_io_vec, 158 .iter = bio->bi_iter, 159 }; 160 } else { 161 /* the internal flush request may not have bio attached */ 162 *iter = (struct blk_map_iter) {}; 163 } 164} 165 166static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 167 struct dma_iova_state *state, struct blk_dma_iter *iter, 168 unsigned int total_len) 169{ 170 struct phys_vec vec; 171 172 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 173 iter->status = BLK_STS_OK; 174 iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 175 176 /* 177 * Grab the first segment ASAP because we'll need it to check for P2P 178 * transfers. 179 */ 180 if (!blk_map_iter_next(req, &iter->iter, &vec)) 181 return false; 182 183 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 184 phys_to_page(vec.paddr))) { 185 case PCI_P2PDMA_MAP_BUS_ADDR: 186 return blk_dma_map_bus(iter, &vec); 187 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 188 /* 189 * P2P transfers through the host bridge are treated the 190 * same as non-P2P transfers below and during unmap. 191 */ 192 case PCI_P2PDMA_MAP_NONE: 193 break; 194 default: 195 iter->status = BLK_STS_INVAL; 196 return false; 197 } 198 199 if (blk_can_dma_map_iova(req, dma_dev) && 200 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 201 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 202 memset(state, 0, sizeof(*state)); 203 return blk_dma_map_direct(req, dma_dev, iter, &vec); 204} 205 206/** 207 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 208 * @req: request to map 209 * @dma_dev: device to map to 210 * @state: DMA IOVA state 211 * @iter: block layer DMA iterator 212 * 213 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 214 * caller and don't need to be initialized. @state needs to be stored for use 215 * at unmap time, @iter is only needed at map time. 216 * 217 * Returns %false if there is no segment to map, including due to an error, or 218 * %true ft it did map a segment. 219 * 220 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 221 * the length in @iter.len. If no segment was mapped the status code is 222 * returned in @iter.status. 223 * 224 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 225 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 226 * to try to map the following segments. 227 */ 228bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 229 struct dma_iova_state *state, struct blk_dma_iter *iter) 230{ 231 blk_rq_map_iter_init(req, &iter->iter); 232 return blk_dma_map_iter_start(req, dma_dev, state, iter, 233 blk_rq_payload_bytes(req)); 234} 235EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 236 237/** 238 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 239 * @req: request to map 240 * @dma_dev: device to map to 241 * @state: DMA IOVA state 242 * @iter: block layer DMA iterator 243 * 244 * Iterate to the next mapping after a previous call to 245 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 246 * arguments. 247 * 248 * Returns %false if there is no segment to map, including due to an error, or 249 * %true ft it did map a segment. 250 * 251 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 252 * the length in @iter.len. If no segment was mapped the status code is 253 * returned in @iter.status. 254 */ 255bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 256 struct dma_iova_state *state, struct blk_dma_iter *iter) 257{ 258 struct phys_vec vec; 259 260 if (!blk_map_iter_next(req, &iter->iter, &vec)) 261 return false; 262 263 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 264 return blk_dma_map_bus(iter, &vec); 265 return blk_dma_map_direct(req, dma_dev, iter, &vec); 266} 267EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 268 269static inline struct scatterlist * 270blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 271{ 272 if (!*sg) 273 return sglist; 274 275 /* 276 * If the driver previously mapped a shorter list, we could see a 277 * termination bit prematurely unless it fully inits the sg table 278 * on each mapping. We KNOW that there must be more entries here 279 * or the driver would be buggy, so force clear the termination bit 280 * to avoid doing a full sg_init_table() in drivers for each command. 281 */ 282 sg_unmark_end(*sg); 283 return sg_next(*sg); 284} 285 286/* 287 * Map a request to scatterlist, return number of sg entries setup. Caller 288 * must make sure sg can hold rq->nr_phys_segments entries. 289 */ 290int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 291 struct scatterlist **last_sg) 292{ 293 struct blk_map_iter iter; 294 struct phys_vec vec; 295 int nsegs = 0; 296 297 blk_rq_map_iter_init(rq, &iter); 298 while (blk_map_iter_next(rq, &iter, &vec)) { 299 *last_sg = blk_next_sg(last_sg, sglist); 300 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 301 offset_in_page(vec.paddr)); 302 nsegs++; 303 } 304 305 if (*last_sg) 306 sg_mark_end(*last_sg); 307 308 /* 309 * Something must have been wrong if the figured number of 310 * segment is bigger than number of req's physical segments 311 */ 312 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 313 314 return nsegs; 315} 316EXPORT_SYMBOL(__blk_rq_map_sg); 317 318#ifdef CONFIG_BLK_DEV_INTEGRITY 319/** 320 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 321 * for a request 322 * @req: request to map 323 * @dma_dev: device to map to 324 * @state: DMA IOVA state 325 * @iter: block layer DMA iterator 326 * 327 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 328 * provided by the caller and don't need to be initialized. @state needs to be 329 * stored for use at unmap time, @iter is only needed at map time. 330 * 331 * Returns %false if there is no segment to map, including due to an error, or 332 * %true if it did map a segment. 333 * 334 * If a segment was mapped, the DMA address for it is returned in @iter.addr 335 * and the length in @iter.len. If no segment was mapped the status code is 336 * returned in @iter.status. 337 * 338 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 339 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 340 * to try to map the following segments. 341 */ 342bool blk_rq_integrity_dma_map_iter_start(struct request *req, 343 struct device *dma_dev, struct dma_iova_state *state, 344 struct blk_dma_iter *iter) 345{ 346 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 347 blk_rq_sectors(req)); 348 struct bio *bio = req->bio; 349 350 iter->iter = (struct blk_map_iter) { 351 .bio = bio, 352 .iter = bio_integrity(bio)->bip_iter, 353 .bvecs = bio_integrity(bio)->bip_vec, 354 .is_integrity = true, 355 }; 356 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 357} 358EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 359 360/** 361 * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for 362 * a request 363 * @req: request to map 364 * @dma_dev: device to map to 365 * @state: DMA IOVA state 366 * @iter: block layer DMA iterator 367 * 368 * Iterate to the next integrity mapping after a previous call to 369 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 370 * of the arguments. 371 * 372 * Returns %false if there is no segment to map, including due to an error, or 373 * %true if it did map a segment. 374 * 375 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 376 * the length in @iter.len. If no segment was mapped the status code is 377 * returned in @iter.status. 378 */ 379bool blk_rq_integrity_dma_map_iter_next(struct request *req, 380 struct device *dma_dev, struct blk_dma_iter *iter) 381{ 382 struct phys_vec vec; 383 384 if (!blk_map_iter_next(req, &iter->iter, &vec)) 385 return false; 386 387 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 388 return blk_dma_map_bus(iter, &vec); 389 return blk_dma_map_direct(req, dma_dev, iter, &vec); 390} 391EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 392 393/** 394 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 395 * @rq: request to map 396 * @sglist: target scatterlist 397 * 398 * Description: Map the integrity vectors in request into a 399 * scatterlist. The scatterlist must be big enough to hold all 400 * elements. I.e. sized using blk_rq_count_integrity_sg() or 401 * rq->nr_integrity_segments. 402 */ 403int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 404{ 405 struct request_queue *q = rq->q; 406 struct scatterlist *sg = NULL; 407 struct bio *bio = rq->bio; 408 unsigned int segments = 0; 409 struct phys_vec vec; 410 411 struct blk_map_iter iter = { 412 .bio = bio, 413 .iter = bio_integrity(bio)->bip_iter, 414 .bvecs = bio_integrity(bio)->bip_vec, 415 .is_integrity = true, 416 }; 417 418 while (blk_map_iter_next(rq, &iter, &vec)) { 419 sg = blk_next_sg(&sg, sglist); 420 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 421 offset_in_page(vec.paddr)); 422 segments++; 423 } 424 425 if (sg) 426 sg_mark_end(sg); 427 428 /* 429 * Something must have been wrong if the figured number of segment 430 * is bigger than number of req's physical integrity segments 431 */ 432 BUG_ON(segments > rq->nr_integrity_segments); 433 BUG_ON(segments > queue_max_integrity_segments(q)); 434 return segments; 435} 436EXPORT_SYMBOL(blk_rq_map_integrity_sg); 437#endif