dma-buf: system_heap: Allocate higher order pages if available

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

While the system heap can return non-contiguous pages,
try to allocate larger order pages if possible.

This will allow slight performance gains and make implementing
page pooling easier.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Brian Starkey <brian.starkey@arm.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20201121235002.69945-6-john.stultz@linaro.org

authored by

John Stultz and committed by

Sumit Semwal 5 years ago d963ab0f 4c68e499

+71 -18

1 changed file

expand all

drivers

dma-buf

heaps

system_heap.c

+71 -18

drivers/dma-buf/heaps/system_heap.c

··· 40 40 bool mapped; 41 41 }; 42 42 43 + #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ 44 + | __GFP_NORETRY) & ~__GFP_RECLAIM) \ 45 + | __GFP_COMP) 46 + #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP) 47 + static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP}; 48 + /* 49 + * The selection of the orders used for allocation (1MB, 64K, 4K) is designed 50 + * to match with the sizes often found in IOMMUs. Using order 4 pages instead 51 + * of order 0 pages can significantly improve the performance of many IOMMUs 52 + * by reducing TLB pressure and time spent updating page tables. 53 + */ 54 + static const unsigned int orders[] = {8, 4, 0}; 55 + #define NUM_ORDERS ARRAY_SIZE(orders) 56 + 43 57 static struct sg_table *dup_sg_table(struct sg_table *table) 44 58 { 45 59 struct sg_table *new_table; ··· 289 275 int i; 290 276 291 277 table = &buffer->sg_table; 292 - for_each_sgtable_sg(table, sg, i) 293 - __free_page(sg_page(sg)); 278 + for_each_sg(table->sgl, sg, table->nents, i) { 279 + struct page *page = sg_page(sg); 280 + 281 + __free_pages(page, compound_order(page)); 282 + } 294 283 sg_free_table(table); 295 284 kfree(buffer); 296 285 } ··· 311 294 .release = system_heap_dma_buf_release, 312 295 }; 313 296 297 + static struct page *alloc_largest_available(unsigned long size, 298 + unsigned int max_order) 299 + { 300 + struct page *page; 301 + int i; 302 + 303 + for (i = 0; i < NUM_ORDERS; i++) { 304 + if (size < (PAGE_SIZE << orders[i])) 305 + continue; 306 + if (max_order < orders[i]) 307 + continue; 308 + 309 + page = alloc_pages(order_flags[i], orders[i]); 310 + if (!page) 311 + continue; 312 + return page; 313 + } 314 + return NULL; 315 + } 316 + 314 317 static int system_heap_allocate(struct dma_heap *heap, 315 318 unsigned long len, 316 319 unsigned long fd_flags, ··· 338 301 { 339 302 struct system_heap_buffer *buffer; 340 303 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 304 + unsigned long size_remaining = len; 305 + unsigned int max_order = orders[0]; 341 306 struct dma_buf *dmabuf; 342 307 struct sg_table *table; 343 308 struct scatterlist *sg; 344 - pgoff_t pagecount; 345 - pgoff_t pg; 309 + struct list_head pages; 310 + struct page *page, *tmp_page; 346 311 int i, ret = -ENOMEM; 347 312 348 313 buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ··· 356 317 buffer->heap = heap; 357 318 buffer->len = len; 358 319 359 - table = &buffer->sg_table; 360 - pagecount = len / PAGE_SIZE; 361 - if (sg_alloc_table(table, pagecount, GFP_KERNEL)) 362 - goto free_buffer; 363 - 364 - sg = table->sgl; 365 - for (pg = 0; pg < pagecount; pg++) { 366 - struct page *page; 320 + INIT_LIST_HEAD(&pages); 321 + i = 0; 322 + while (size_remaining > 0) { 367 323 /* 368 324 * Avoid trying to allocate memory if the process 369 325 * has been killed by SIGKILL 370 326 */ 371 327 if (fatal_signal_pending(current)) 372 - goto free_pages; 373 - page = alloc_page(GFP_KERNEL | __GFP_ZERO); 328 + goto free_buffer; 329 + 330 + page = alloc_largest_available(size_remaining, max_order); 374 331 if (!page) 375 - goto free_pages; 332 + goto free_buffer; 333 + 334 + list_add_tail(&page->lru, &pages); 335 + size_remaining -= page_size(page); 336 + max_order = compound_order(page); 337 + i++; 338 + } 339 + 340 + table = &buffer->sg_table; 341 + if (sg_alloc_table(table, i, GFP_KERNEL)) 342 + goto free_buffer; 343 + 344 + sg = table->sgl; 345 + list_for_each_entry_safe(page, tmp_page, &pages, lru) { 376 346 sg_set_page(sg, page, page_size(page), 0); 377 347 sg = sg_next(sg); 348 + list_del(&page->lru); 378 349 } 379 350 380 351 /* create the dmabuf */ ··· 404 355 /* just return, as put will call release and that will free */ 405 356 return ret; 406 357 } 407 - 408 358 return ret; 409 359 410 360 free_pages: 411 - for_each_sgtable_sg(table, sg, i) 412 - __free_page(sg_page(sg)); 361 + for_each_sgtable_sg(table, sg, i) { 362 + struct page *p = sg_page(sg); 363 + 364 + __free_pages(p, compound_order(p)); 365 + } 413 366 sg_free_table(table); 414 367 free_buffer: 368 + list_for_each_entry_safe(page, tmp_page, &pages, lru) 369 + __free_pages(page, compound_order(page)); 415 370 kfree(buffer); 416 371 417 372 return ret;