Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

staging: vc04_services: add vchiq_pagelist_info structure

The current dma_map_sg based implementation for bulk messages
computes many offsets into a single allocation multiple times in
both the create and free code paths. This is inefficient,
error prone and in fact still has a few lingering issues
with arm64.

This change replaces a small portion of that inplementation with
new code that uses a new struct vchiq_pagelist_info to store the
needed information rather then complex offset calculations.

This improved implementation should be more efficient and easier
to understand and maintain.

Tests Run(Both Pass):
vchiq_test -p 1
vchiq_test -f 10

Signed-off-by: Michael Zoran <mzoran@crowfest.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Michael Zoran and committed by
Greg Kroah-Hartman
4807f2c0 4dfc109c

+113 -110
+113 -110
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
··· 62 62 VCHIQ_ARM_STATE_T arm_state; 63 63 } VCHIQ_2835_ARM_STATE_T; 64 64 65 + struct vchiq_pagelist_info { 66 + PAGELIST_T *pagelist; 67 + size_t pagelist_buffer_size; 68 + dma_addr_t dma_addr; 69 + enum dma_data_direction dma_dir; 70 + unsigned int num_pages; 71 + unsigned int pages_need_release; 72 + struct page **pages; 73 + struct scatterlist *scatterlist; 74 + unsigned int scatterlist_mapped; 75 + }; 76 + 65 77 static void __iomem *g_regs; 66 78 static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE); 67 79 static unsigned int g_fragments_size; ··· 89 77 static irqreturn_t 90 78 vchiq_doorbell_irq(int irq, void *dev_id); 91 79 92 - static int 80 + static struct vchiq_pagelist_info * 93 81 create_pagelist(char __user *buf, size_t count, unsigned short type, 94 - struct task_struct *task, PAGELIST_T **ppagelist, 95 - dma_addr_t *dma_addr); 82 + struct task_struct *task); 96 83 97 84 static void 98 - free_pagelist(dma_addr_t dma_addr, PAGELIST_T *pagelist, int actual); 85 + free_pagelist(struct vchiq_pagelist_info *pagelistinfo, 86 + int actual); 99 87 100 88 int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state) 101 89 { ··· 236 224 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle, 237 225 void *offset, int size, int dir) 238 226 { 239 - PAGELIST_T *pagelist; 240 - int ret; 241 - dma_addr_t dma_addr; 227 + struct vchiq_pagelist_info *pagelistinfo; 242 228 243 229 WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID); 244 230 245 - ret = create_pagelist((char __user *)offset, size, 246 - (dir == VCHIQ_BULK_RECEIVE) 247 - ? PAGELIST_READ 248 - : PAGELIST_WRITE, 249 - current, 250 - &pagelist, 251 - &dma_addr); 231 + pagelistinfo = create_pagelist((char __user *)offset, size, 232 + (dir == VCHIQ_BULK_RECEIVE) 233 + ? PAGELIST_READ 234 + : PAGELIST_WRITE, 235 + current); 252 236 253 - if (ret != 0) 237 + if (!pagelistinfo) 254 238 return VCHIQ_ERROR; 255 239 256 240 bulk->handle = memhandle; 257 - bulk->data = (void *)(unsigned long)dma_addr; 241 + bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr; 258 242 259 - /* Store the pagelist address in remote_data, which isn't used by the 260 - slave. */ 261 - bulk->remote_data = pagelist; 243 + /* 244 + * Store the pagelistinfo address in remote_data, 245 + * which isn't used by the slave. 246 + */ 247 + bulk->remote_data = pagelistinfo; 262 248 263 249 return VCHIQ_SUCCESS; 264 250 } ··· 265 255 vchiq_complete_bulk(VCHIQ_BULK_T *bulk) 266 256 { 267 257 if (bulk && bulk->remote_data && bulk->actual) 268 - free_pagelist((dma_addr_t)(unsigned long)bulk->data, 269 - (PAGELIST_T *)bulk->remote_data, bulk->actual); 258 + free_pagelist((struct vchiq_pagelist_info *)bulk->remote_data, 259 + bulk->actual); 270 260 } 271 261 272 262 void ··· 354 344 return ret; 355 345 } 356 346 347 + static void 348 + cleaup_pagelistinfo(struct vchiq_pagelist_info *pagelistinfo) 349 + { 350 + if (pagelistinfo->scatterlist_mapped) { 351 + dma_unmap_sg(g_dev, pagelistinfo->scatterlist, 352 + pagelistinfo->num_pages, pagelistinfo->dma_dir); 353 + } 354 + 355 + if (pagelistinfo->pages_need_release) { 356 + unsigned int i; 357 + 358 + for (i = 0; i < pagelistinfo->num_pages; i++) 359 + put_page(pagelistinfo->pages[i]); 360 + } 361 + 362 + dma_free_coherent(g_dev, pagelistinfo->pagelist_buffer_size, 363 + pagelistinfo->pagelist, pagelistinfo->dma_addr); 364 + } 365 + 357 366 /* There is a potential problem with partial cache lines (pages?) 358 367 ** at the ends of the block when reading. If the CPU accessed anything in 359 368 ** the same line (page?) then it may have pulled old data into the cache, ··· 381 352 ** cached area. 382 353 */ 383 354 384 - static int 355 + static struct vchiq_pagelist_info * 385 356 create_pagelist(char __user *buf, size_t count, unsigned short type, 386 - struct task_struct *task, PAGELIST_T **ppagelist, 387 - dma_addr_t *dma_addr) 357 + struct task_struct *task) 388 358 { 389 359 PAGELIST_T *pagelist; 360 + struct vchiq_pagelist_info *pagelistinfo; 390 361 struct page **pages; 391 362 u32 *addrs; 392 363 unsigned int num_pages, offset, i, k; 393 364 int actual_pages; 394 - unsigned long *need_release; 395 365 size_t pagelist_size; 396 366 struct scatterlist *scatterlist, *sg; 397 367 int dma_buffers; 398 - int dir; 368 + dma_addr_t dma_addr; 399 369 400 370 offset = ((unsigned int)(unsigned long)buf & (PAGE_SIZE - 1)); 401 371 num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE; 402 372 403 373 pagelist_size = sizeof(PAGELIST_T) + 404 - (num_pages * sizeof(unsigned long)) + 405 - sizeof(unsigned long) + 374 + (num_pages * sizeof(u32)) + 406 375 (num_pages * sizeof(pages[0]) + 407 - (num_pages * sizeof(struct scatterlist))); 408 - 409 - *ppagelist = NULL; 410 - 411 - dir = (type == PAGELIST_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 376 + (num_pages * sizeof(struct scatterlist))) + 377 + sizeof(struct vchiq_pagelist_info); 412 378 413 379 /* Allocate enough storage to hold the page pointers and the page 414 380 ** list 415 381 */ 416 382 pagelist = dma_zalloc_coherent(g_dev, 417 383 pagelist_size, 418 - dma_addr, 384 + &dma_addr, 419 385 GFP_KERNEL); 420 386 421 387 vchiq_log_trace(vchiq_arm_log_level, "create_pagelist - %pK", 422 388 pagelist); 423 389 if (!pagelist) 424 - return -ENOMEM; 390 + return NULL; 425 391 426 - addrs = pagelist->addrs; 427 - need_release = (unsigned long *)(addrs + num_pages); 428 - pages = (struct page **)(addrs + num_pages + 1); 429 - scatterlist = (struct scatterlist *)(pages + num_pages); 392 + addrs = pagelist->addrs; 393 + pages = (struct page **)(addrs + num_pages); 394 + scatterlist = (struct scatterlist *)(pages + num_pages); 395 + pagelistinfo = (struct vchiq_pagelist_info *) 396 + (scatterlist + num_pages); 397 + 398 + pagelist->length = count; 399 + pagelist->type = type; 400 + pagelist->offset = offset; 401 + 402 + /* Populate the fields of the pagelistinfo structure */ 403 + pagelistinfo->pagelist = pagelist; 404 + pagelistinfo->pagelist_buffer_size = pagelist_size; 405 + pagelistinfo->dma_addr = dma_addr; 406 + pagelistinfo->dma_dir = (type == PAGELIST_WRITE) ? 407 + DMA_TO_DEVICE : DMA_FROM_DEVICE; 408 + pagelistinfo->num_pages = num_pages; 409 + pagelistinfo->pages_need_release = 0; 410 + pagelistinfo->pages = pages; 411 + pagelistinfo->scatterlist = scatterlist; 412 + pagelistinfo->scatterlist_mapped = 0; 430 413 431 414 if (is_vmalloc_addr(buf)) { 432 415 unsigned long length = count; ··· 456 415 length -= bytes; 457 416 off = 0; 458 417 } 459 - *need_release = 0; /* do not try and release vmalloc pages */ 418 + /* do not try and release vmalloc pages */ 460 419 } else { 461 420 down_read(&task->mm->mmap_sem); 462 421 actual_pages = get_user_pages( ··· 479 438 actual_pages--; 480 439 put_page(pages[actual_pages]); 481 440 } 482 - dma_free_coherent(g_dev, pagelist_size, 483 - pagelist, *dma_addr); 484 - if (actual_pages == 0) 485 - actual_pages = -ENOMEM; 486 - return actual_pages; 441 + cleaup_pagelistinfo(pagelistinfo); 442 + return NULL; 487 443 } 488 - *need_release = 1; /* release user pages */ 444 + /* release user pages */ 445 + pagelistinfo->pages_need_release = 1; 489 446 } 490 - 491 - pagelist->length = count; 492 - pagelist->type = type; 493 - pagelist->offset = offset; 494 447 495 448 /* 496 449 * Initialize the scatterlist so that the magic cookie ··· 498 463 dma_buffers = dma_map_sg(g_dev, 499 464 scatterlist, 500 465 num_pages, 501 - dir); 466 + pagelistinfo->dma_dir); 502 467 503 468 if (dma_buffers == 0) { 504 - dma_free_coherent(g_dev, pagelist_size, 505 - pagelist, *dma_addr); 506 - 507 - return -EINTR; 469 + cleaup_pagelistinfo(pagelistinfo); 470 + return NULL; 508 471 } 472 + 473 + pagelistinfo->scatterlist_mapped = 1; 509 474 510 475 /* Combine adjacent blocks for performance */ 511 476 k = 0; ··· 538 503 char *fragments; 539 504 540 505 if (down_interruptible(&g_free_fragments_sema) != 0) { 541 - dma_unmap_sg(g_dev, scatterlist, num_pages, 542 - DMA_BIDIRECTIONAL); 543 - dma_free_coherent(g_dev, pagelist_size, 544 - pagelist, *dma_addr); 545 - return -EINTR; 506 + cleaup_pagelistinfo(pagelistinfo); 507 + return NULL; 546 508 } 547 509 548 510 WARN_ON(g_free_fragments == NULL); ··· 553 521 (fragments - g_fragments_base) / g_fragments_size; 554 522 } 555 523 556 - *ppagelist = pagelist; 557 - 558 - return 0; 524 + return pagelistinfo; 559 525 } 560 526 561 527 static void 562 - free_pagelist(dma_addr_t dma_addr, PAGELIST_T *pagelist, int actual) 528 + free_pagelist(struct vchiq_pagelist_info *pagelistinfo, 529 + int actual) 563 530 { 564 - unsigned long *need_release; 565 - struct page **pages; 566 - unsigned int num_pages, i; 567 - size_t pagelist_size; 568 - struct scatterlist *scatterlist; 569 - int dir; 531 + unsigned int i; 532 + PAGELIST_T *pagelist = pagelistinfo->pagelist; 533 + struct page **pages = pagelistinfo->pages; 534 + unsigned int num_pages = pagelistinfo->num_pages; 570 535 571 536 vchiq_log_trace(vchiq_arm_log_level, "free_pagelist - %pK, %d", 572 - pagelist, actual); 537 + pagelistinfo->pagelist, actual); 573 538 574 - dir = (pagelist->type == PAGELIST_WRITE) ? DMA_TO_DEVICE : 575 - DMA_FROM_DEVICE; 576 - 577 - num_pages = 578 - (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / 579 - PAGE_SIZE; 580 - 581 - pagelist_size = sizeof(PAGELIST_T) + 582 - (num_pages * sizeof(unsigned long)) + 583 - sizeof(unsigned long) + 584 - (num_pages * sizeof(pages[0]) + 585 - (num_pages * sizeof(struct scatterlist))); 586 - 587 - need_release = (unsigned long *)(pagelist->addrs + num_pages); 588 - pages = (struct page **)(pagelist->addrs + num_pages + 1); 589 - scatterlist = (struct scatterlist *)(pages + num_pages); 590 - 591 - dma_unmap_sg(g_dev, scatterlist, num_pages, dir); 539 + /* 540 + * NOTE: dma_unmap_sg must be called before the 541 + * cpu can touch any of the data/pages. 542 + */ 543 + dma_unmap_sg(g_dev, pagelistinfo->scatterlist, 544 + pagelistinfo->num_pages, pagelistinfo->dma_dir); 545 + pagelistinfo->scatterlist_mapped = 0; 592 546 593 547 /* Deal with any partial cache lines (fragments) */ 594 548 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { ··· 612 594 up(&g_free_fragments_sema); 613 595 } 614 596 615 - if (*need_release) { 616 - unsigned int length = pagelist->length; 617 - unsigned int offset = pagelist->offset; 618 - 619 - for (i = 0; i < num_pages; i++) { 620 - struct page *pg = pages[i]; 621 - 622 - if (pagelist->type != PAGELIST_WRITE) { 623 - unsigned int bytes = PAGE_SIZE - offset; 624 - 625 - if (bytes > length) 626 - bytes = length; 627 - 628 - length -= bytes; 629 - offset = 0; 630 - set_page_dirty(pg); 631 - } 632 - put_page(pg); 633 - } 597 + /* Need to mark all the pages dirty. */ 598 + if (pagelist->type != PAGELIST_WRITE && 599 + pagelistinfo->pages_need_release) { 600 + for (i = 0; i < num_pages; i++) 601 + set_page_dirty(pages[i]); 634 602 } 635 603 636 - dma_free_coherent(g_dev, pagelist_size, 637 - pagelist, dma_addr); 604 + cleaup_pagelistinfo(pagelistinfo); 638 605 }