Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib/scatterlist: Fix wrong update of orig_nents

orig_nents should represent the number of entries with pages,
but __sg_alloc_table_from_pages sets orig_nents as the number of
total entries in the table. This is wrong when the API is used for
dynamic allocation where not all the table entries are mapped with
pages. It wasn't observed until now, since RDMA umem who uses this
API in the dynamic form doesn't use orig_nents implicit or explicit
by the scatterlist APIs.

Fix it by changing the append API to track the SG append table
state and have an API to free the append table according to the
total number of entries in the table.
Now all APIs set orig_nents as number of enries with pages.

Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages")
Link: https://lore.kernel.org/r/20210824142531.3877007-3-maorg@nvidia.com
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Maor Gottlieb and committed by
Jason Gunthorpe
3e302dbc 90e7a6de

+135 -92
+19 -15
drivers/infiniband/core/umem.c
··· 59 59 unpin_user_page_range_dirty_lock(sg_page(sg), 60 60 DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); 61 61 62 - sg_free_table(&umem->sg_head); 62 + sg_free_append_table(&umem->sgt_append); 63 63 } 64 64 65 65 /** ··· 155 155 unsigned long dma_attr = 0; 156 156 struct mm_struct *mm; 157 157 unsigned long npages; 158 - int ret; 159 - struct scatterlist *sg = NULL; 158 + int pinned, ret; 160 159 unsigned int gup_flags = FOLL_WRITE; 161 160 162 161 /* ··· 215 216 216 217 while (npages) { 217 218 cond_resched(); 218 - ret = pin_user_pages_fast(cur_base, 219 + pinned = pin_user_pages_fast(cur_base, 219 220 min_t(unsigned long, npages, 220 221 PAGE_SIZE / 221 222 sizeof(struct page *)), 222 223 gup_flags | FOLL_LONGTERM, page_list); 223 - if (ret < 0) 224 + if (pinned < 0) { 225 + ret = pinned; 224 226 goto umem_release; 227 + } 225 228 226 - cur_base += ret * PAGE_SIZE; 227 - npages -= ret; 228 - sg = sg_alloc_append_table_from_pages(&umem->sg_head, page_list, 229 - ret, 0, ret << PAGE_SHIFT, 230 - ib_dma_max_seg_size(device), sg, npages, 231 - GFP_KERNEL); 232 - umem->sg_nents = umem->sg_head.nents; 233 - if (IS_ERR(sg)) { 234 - unpin_user_pages_dirty_lock(page_list, ret, 0); 235 - ret = PTR_ERR(sg); 229 + cur_base += pinned * PAGE_SIZE; 230 + npages -= pinned; 231 + ret = sg_alloc_append_table_from_pages( 232 + &umem->sgt_append, page_list, pinned, 0, 233 + pinned << PAGE_SHIFT, ib_dma_max_seg_size(device), 234 + npages, GFP_KERNEL); 235 + umem->sg_nents = umem->sgt_append.sgt.nents; 236 + if (ret) { 237 + memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, 238 + sizeof(umem->sgt_append.sgt)); 239 + unpin_user_pages_dirty_lock(page_list, pinned, 0); 236 240 goto umem_release; 237 241 } 238 242 } 239 243 244 + memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, 245 + sizeof(umem->sgt_append.sgt)); 240 246 if (access & IB_ACCESS_RELAXED_ORDERING) 241 247 dma_attr |= DMA_ATTR_WEAK_ORDERING; 242 248
+13 -6
include/linux/scatterlist.h
··· 39 39 unsigned int orig_nents; /* original size of list */ 40 40 }; 41 41 42 + struct sg_append_table { 43 + struct sg_table sgt; /* The scatter list table */ 44 + struct scatterlist *prv; /* last populated sge in the table */ 45 + unsigned int total_nents; /* Total entries in the table */ 46 + }; 47 + 42 48 /* 43 49 * Notes on SG table design. 44 50 * ··· 286 280 typedef void (sg_free_fn)(struct scatterlist *, unsigned int); 287 281 288 282 void __sg_free_table(struct sg_table *, unsigned int, unsigned int, 289 - sg_free_fn *); 283 + sg_free_fn *, unsigned int); 290 284 void sg_free_table(struct sg_table *); 285 + void sg_free_append_table(struct sg_append_table *sgt); 291 286 int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, 292 287 struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); 293 288 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); 294 - struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, 295 - struct page **pages, unsigned int n_pages, unsigned int offset, 296 - unsigned long size, unsigned int max_segment, 297 - struct scatterlist *prv, unsigned int left_pages, 298 - gfp_t gfp_mask); 289 + int sg_alloc_append_table_from_pages(struct sg_append_table *sgt, 290 + struct page **pages, unsigned int n_pages, 291 + unsigned int offset, unsigned long size, 292 + unsigned int max_segment, 293 + unsigned int left_pages, gfp_t gfp_mask); 299 294 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, 300 295 unsigned int n_pages, unsigned int offset, 301 296 unsigned long size,
+1
include/rdma/ib_umem.h
··· 26 26 u32 is_odp : 1; 27 27 u32 is_dmabuf : 1; 28 28 struct work_struct work; 29 + struct sg_append_table sgt_append; 29 30 struct sg_table sg_head; 30 31 int nmap; 31 32 unsigned int sg_nents;
+77 -50
lib/scatterlist.c
··· 182 182 * @nents_first_chunk: Number of entries int the (preallocated) first 183 183 * scatterlist chunk, 0 means no such preallocated first chunk 184 184 * @free_fn: Free function 185 + * @num_ents: Number of entries in the table 185 186 * 186 187 * Description: 187 188 * Free an sg table previously allocated and setup with ··· 191 190 * 192 191 **/ 193 192 void __sg_free_table(struct sg_table *table, unsigned int max_ents, 194 - unsigned int nents_first_chunk, sg_free_fn *free_fn) 193 + unsigned int nents_first_chunk, sg_free_fn *free_fn, 194 + unsigned int num_ents) 195 195 { 196 196 struct scatterlist *sgl, *next; 197 197 unsigned curr_max_ents = nents_first_chunk ?: max_ents; ··· 201 199 return; 202 200 203 201 sgl = table->sgl; 204 - while (table->orig_nents) { 205 - unsigned int alloc_size = table->orig_nents; 202 + while (num_ents) { 203 + unsigned int alloc_size = num_ents; 206 204 unsigned int sg_size; 207 205 208 206 /* ··· 220 218 next = NULL; 221 219 } 222 220 223 - table->orig_nents -= sg_size; 221 + num_ents -= sg_size; 224 222 if (nents_first_chunk) 225 223 nents_first_chunk = 0; 226 224 else ··· 234 232 EXPORT_SYMBOL(__sg_free_table); 235 233 236 234 /** 235 + * sg_free_append_table - Free a previously allocated append sg table. 236 + * @table: The mapped sg append table header 237 + * 238 + **/ 239 + void sg_free_append_table(struct sg_append_table *table) 240 + { 241 + __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree, 242 + table->total_nents); 243 + } 244 + EXPORT_SYMBOL(sg_free_append_table); 245 + 246 + 247 + /** 237 248 * sg_free_table - Free a previously allocated sg table 238 249 * @table: The mapped sg table header 239 250 * 240 251 **/ 241 252 void sg_free_table(struct sg_table *table) 242 253 { 243 - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree); 254 + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree, 255 + table->orig_nents); 244 256 } 245 257 EXPORT_SYMBOL(sg_free_table); 246 258 ··· 375 359 ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, 376 360 NULL, 0, gfp_mask, sg_kmalloc); 377 361 if (unlikely(ret)) 378 - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree); 379 - 362 + sg_free_table(table); 380 363 return ret; 381 364 } 382 365 EXPORT_SYMBOL(sg_alloc_table); 383 366 384 - static struct scatterlist *get_next_sg(struct sg_table *table, 367 + static struct scatterlist *get_next_sg(struct sg_append_table *table, 385 368 struct scatterlist *cur, 386 369 unsigned long needed_sges, 387 370 gfp_t gfp_mask) ··· 401 386 return ERR_PTR(-ENOMEM); 402 387 sg_init_table(new_sg, alloc_size); 403 388 if (cur) { 389 + table->total_nents += alloc_size - 1; 404 390 __sg_chain(next_sg, new_sg); 405 - table->orig_nents += alloc_size - 1; 406 391 } else { 407 - table->sgl = new_sg; 408 - table->orig_nents = alloc_size; 409 - table->nents = 0; 392 + table->sgt.sgl = new_sg; 393 + table->total_nents = alloc_size; 410 394 } 411 395 return new_sg; 412 396 } 413 397 414 398 /** 415 - * sg_alloc_append_table_from_pages - Allocate and initialize an sg table from 416 - * an array of pages 417 - * @sgt: The sg table header to use 418 - * @pages: Pointer to an array of page pointers 419 - * @n_pages: Number of pages in the pages array 399 + * sg_alloc_append_table_from_pages - Allocate and initialize an append sg 400 + * table from an array of pages 401 + * @sgt_append: The sg append table to use 402 + * @pages: Pointer to an array of page pointers 403 + * @n_pages: Number of pages in the pages array 420 404 * @offset: Offset from start of the first page to the start of a buffer 421 405 * @size: Number of valid bytes in the buffer (after offset) 422 406 * @max_segment: Maximum size of a scatterlist element in bytes 423 - * @prv: Last populated sge in sgt 424 407 * @left_pages: Left pages caller have to set after this call 425 408 * @gfp_mask: GFP allocation mask 426 409 * 427 410 * Description: 428 - * If @prv is NULL, allocate and initialize an sg table from a list of pages, 429 - * else reuse the scatterlist passed in at @prv. 430 - * Contiguous ranges of the pages are squashed into a single scatterlist 431 - * entry up to the maximum size specified in @max_segment. A user may 432 - * provide an offset at a start and a size of valid data in a buffer 433 - * specified by the page array. 411 + * In the first call it allocate and initialize an sg table from a list of 412 + * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of 413 + * the pages are squashed into a single scatterlist entry up to the maximum 414 + * size specified in @max_segment. A user may provide an offset at a start 415 + * and a size of valid data in a buffer specified by the page array. The 416 + * returned sg table is released by sg_free_append_table 434 417 * 435 418 * Returns: 436 - * Last SGE in sgt on success, PTR_ERR on otherwise. 437 - * The allocation in @sgt must be released by sg_free_table. 419 + * 0 on success, negative error on failure 438 420 * 439 421 * Notes: 440 422 * If this function returns non-0 (eg failure), the caller must call 441 - * sg_free_table() to cleanup any leftover allocations. 423 + * sg_free_append_table() to cleanup any leftover allocations. 424 + * 425 + * In the fist call, sgt_append must by initialized. 442 426 */ 443 - struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, 427 + int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, 444 428 struct page **pages, unsigned int n_pages, unsigned int offset, 445 429 unsigned long size, unsigned int max_segment, 446 - struct scatterlist *prv, unsigned int left_pages, 447 - gfp_t gfp_mask) 430 + unsigned int left_pages, gfp_t gfp_mask) 448 431 { 449 432 unsigned int chunks, cur_page, seg_len, i, prv_len = 0; 450 433 unsigned int added_nents = 0; 451 - struct scatterlist *s = prv; 434 + struct scatterlist *s = sgt_append->prv; 452 435 453 436 /* 454 437 * The algorithm below requires max_segment to be aligned to PAGE_SIZE ··· 454 441 */ 455 442 max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); 456 443 if (WARN_ON(max_segment < PAGE_SIZE)) 457 - return ERR_PTR(-EINVAL); 444 + return -EINVAL; 458 445 459 - if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) 460 - return ERR_PTR(-EOPNOTSUPP); 446 + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) 447 + return -EOPNOTSUPP; 461 448 462 - if (prv) { 463 - unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE + 464 - prv->offset + prv->length) / 465 - PAGE_SIZE; 449 + if (sgt_append->prv) { 450 + unsigned long paddr = 451 + (page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE + 452 + sgt_append->prv->offset + sgt_append->prv->length) / 453 + PAGE_SIZE; 466 454 467 455 if (WARN_ON(offset)) 468 - return ERR_PTR(-EINVAL); 456 + return -EINVAL; 469 457 470 458 /* Merge contiguous pages into the last SG */ 471 - prv_len = prv->length; 459 + prv_len = sgt_append->prv->length; 472 460 while (n_pages && page_to_pfn(pages[0]) == paddr) { 473 - if (prv->length + PAGE_SIZE > max_segment) 461 + if (sgt_append->prv->length + PAGE_SIZE > max_segment) 474 462 break; 475 - prv->length += PAGE_SIZE; 463 + sgt_append->prv->length += PAGE_SIZE; 476 464 paddr++; 477 465 pages++; 478 466 n_pages--; ··· 510 496 } 511 497 512 498 /* Pass how many chunks might be left */ 513 - s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask); 499 + s = get_next_sg(sgt_append, s, chunks - i + left_pages, 500 + gfp_mask); 514 501 if (IS_ERR(s)) { 515 502 /* 516 503 * Adjust entry length to be as before function was 517 504 * called. 518 505 */ 519 - if (prv) 520 - prv->length = prv_len; 521 - return s; 506 + if (sgt_append->prv) 507 + sgt_append->prv->length = prv_len; 508 + return PTR_ERR(s); 522 509 } 523 510 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; 524 511 sg_set_page(s, pages[cur_page], ··· 529 514 offset = 0; 530 515 cur_page = j; 531 516 } 532 - sgt->nents += added_nents; 517 + sgt_append->sgt.nents += added_nents; 518 + sgt_append->sgt.orig_nents = sgt_append->sgt.nents; 519 + sgt_append->prv = s; 533 520 out: 534 521 if (!left_pages) 535 522 sg_mark_end(s); 536 - return s; 523 + return 0; 537 524 } 538 525 EXPORT_SYMBOL(sg_alloc_append_table_from_pages); 539 526 ··· 567 550 unsigned long size, unsigned int max_segment, 568 551 gfp_t gfp_mask) 569 552 { 570 - return PTR_ERR_OR_ZERO(sg_alloc_append_table_from_pages(sgt, pages, 571 - n_pages, offset, size, max_segment, NULL, 0, gfp_mask)); 553 + struct sg_append_table append = {}; 554 + int err; 555 + 556 + err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, 557 + size, max_segment, 0, gfp_mask); 558 + if (err) { 559 + sg_free_append_table(&append); 560 + return err; 561 + } 562 + memcpy(sgt, &append.sgt, sizeof(*sgt)); 563 + WARN_ON(append.total_nents != sgt->orig_nents); 564 + return 0; 572 565 } 573 566 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); 574 567
+2 -1
lib/sg_pool.c
··· 90 90 if (nents_first_chunk == 1) 91 91 nents_first_chunk = 0; 92 92 93 - __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free); 93 + __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free, 94 + table->orig_nents); 94 95 } 95 96 EXPORT_SYMBOL_GPL(sg_free_table_chained); 96 97
+23 -20
tools/testing/scatterlist/main.c
··· 85 85 86 86 for (i = 0, test = tests; test->expected_segments; test++, i++) { 87 87 int left_pages = test->pfn_app ? test->num_pages : 0; 88 + struct sg_append_table append = {}; 88 89 struct page *pages[MAX_PAGES]; 89 - struct sg_table st; 90 - struct scatterlist *sg = NULL; 91 90 int ret; 92 91 93 92 set_pages(pages, test->pfn, test->num_pages); 94 93 95 - if (test->pfn_app) { 96 - sg = sg_alloc_append_table_from_pages( 97 - &st, pages, test->num_pages, 0, test->size, 98 - test->max_seg, NULL, left_pages, GFP_KERNEL); 99 - assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); 100 - } else { 94 + if (test->pfn_app) 95 + ret = sg_alloc_append_table_from_pages( 96 + &append, pages, test->num_pages, 0, test->size, 97 + test->max_seg, left_pages, GFP_KERNEL); 98 + else 101 99 ret = sg_alloc_table_from_pages_segment( 102 - &st, pages, test->num_pages, 0, test->size, 103 - test->max_seg, GFP_KERNEL); 104 - assert(ret == test->alloc_ret); 105 - } 100 + &append.sgt, pages, test->num_pages, 0, 101 + test->size, test->max_seg, GFP_KERNEL); 102 + 103 + assert(ret == test->alloc_ret); 106 104 107 105 if (test->alloc_ret) 108 106 continue; 109 107 110 108 if (test->pfn_app) { 111 109 set_pages(pages, test->pfn_app, test->num_pages); 112 - sg = sg_alloc_append_table_from_pages( 113 - &st, pages, test->num_pages, 0, test->size, 114 - test->max_seg, sg, 0, GFP_KERNEL); 110 + ret = sg_alloc_append_table_from_pages( 111 + &append, pages, test->num_pages, 0, test->size, 112 + test->max_seg, 0, GFP_KERNEL); 115 113 116 - assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); 114 + assert(ret == test->alloc_ret); 117 115 } 118 116 119 - VALIDATE(st.nents == test->expected_segments, &st, test); 117 + VALIDATE(append.sgt.nents == test->expected_segments, 118 + &append.sgt, test); 120 119 if (!test->pfn_app) 121 - VALIDATE(st.orig_nents == test->expected_segments, &st, 122 - test); 120 + VALIDATE(append.sgt.orig_nents == 121 + test->expected_segments, 122 + &append.sgt, test); 123 123 124 - sg_free_table(&st); 124 + if (test->pfn_app) 125 + sg_free_append_table(&append); 126 + else 127 + sg_free_table(&append.sgt); 125 128 } 126 129 127 130 assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);