Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

media: cedrus: Don't kernel map most buffers

Except for VP8 probability coefficients buffer, all other buffers are
never accessed by CPU. That allows us to mark them with
DMA_ATTR_NO_KERNEL_MAPPING flag. This helps with decoding big (like 4k)
videos on 32-bit ARM platforms where default vmalloc size is relatively
small - 240 MiB. Since auxiliary buffer are not yet efficiently
allocated, this can be easily exceeded. Even if allocation is optimized,
4k videos will still often exceed this limit.

Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

authored by

Jernej Skrabec and committed by
Mauro Carvalho Chehab
5db127a5 0887e9e1

+82 -62
+64 -49
drivers/staging/media/sunxi/cedrus/cedrus_h264.c
··· 520 520 unsigned int mv_col_size; 521 521 int ret; 522 522 523 + /* 524 + * NOTE: All buffers allocated here are only used by HW, so we 525 + * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them. 526 + */ 527 + 523 528 /* Formula for picture buffer size is taken from CedarX source. */ 524 529 525 530 if (ctx->src_fmt.width > 2048) ··· 543 538 544 539 ctx->codec.h264.pic_info_buf_size = pic_info_size; 545 540 ctx->codec.h264.pic_info_buf = 546 - dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, 547 - &ctx->codec.h264.pic_info_buf_dma, 548 - GFP_KERNEL); 541 + dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 542 + &ctx->codec.h264.pic_info_buf_dma, 543 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 549 544 if (!ctx->codec.h264.pic_info_buf) 550 545 return -ENOMEM; 551 546 552 547 /* 553 548 * That buffer is supposed to be 16kiB in size, and be aligned 554 - * on 16kiB as well. However, dma_alloc_coherent provides the 555 - * guarantee that we'll have a CPU and DMA address aligned on 556 - * the smallest page order that is greater to the requested 557 - * size, so we don't have to overallocate. 549 + * on 16kiB as well. However, dma_alloc_attrs provides the 550 + * guarantee that we'll have a DMA address aligned on the 551 + * smallest page order that is greater to the requested size, 552 + * so we don't have to overallocate. 558 553 */ 559 554 ctx->codec.h264.neighbor_info_buf = 560 - dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 561 - &ctx->codec.h264.neighbor_info_buf_dma, 562 - GFP_KERNEL); 555 + dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 556 + &ctx->codec.h264.neighbor_info_buf_dma, 557 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 563 558 if (!ctx->codec.h264.neighbor_info_buf) { 564 559 ret = -ENOMEM; 565 560 goto err_pic_buf; ··· 587 582 588 583 mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM; 589 584 ctx->codec.h264.mv_col_buf_size = mv_col_size; 590 - ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev, 591 - ctx->codec.h264.mv_col_buf_size, 592 - &ctx->codec.h264.mv_col_buf_dma, 593 - GFP_KERNEL); 585 + ctx->codec.h264.mv_col_buf = 586 + dma_alloc_attrs(dev->dev, 587 + ctx->codec.h264.mv_col_buf_size, 588 + &ctx->codec.h264.mv_col_buf_dma, 589 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 594 590 if (!ctx->codec.h264.mv_col_buf) { 595 591 ret = -ENOMEM; 596 592 goto err_neighbor_buf; ··· 606 600 ctx->codec.h264.deblk_buf_size = 607 601 ALIGN(ctx->src_fmt.width, 32) * 12; 608 602 ctx->codec.h264.deblk_buf = 609 - dma_alloc_coherent(dev->dev, 610 - ctx->codec.h264.deblk_buf_size, 611 - &ctx->codec.h264.deblk_buf_dma, 612 - GFP_KERNEL); 603 + dma_alloc_attrs(dev->dev, 604 + ctx->codec.h264.deblk_buf_size, 605 + &ctx->codec.h264.deblk_buf_dma, 606 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 613 607 if (!ctx->codec.h264.deblk_buf) { 614 608 ret = -ENOMEM; 615 609 goto err_mv_col_buf; ··· 622 616 ctx->codec.h264.intra_pred_buf_size = 623 617 ALIGN(ctx->src_fmt.width, 64) * 5 * 2; 624 618 ctx->codec.h264.intra_pred_buf = 625 - dma_alloc_coherent(dev->dev, 626 - ctx->codec.h264.intra_pred_buf_size, 627 - &ctx->codec.h264.intra_pred_buf_dma, 628 - GFP_KERNEL); 619 + dma_alloc_attrs(dev->dev, 620 + ctx->codec.h264.intra_pred_buf_size, 621 + &ctx->codec.h264.intra_pred_buf_dma, 622 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 629 623 if (!ctx->codec.h264.intra_pred_buf) { 630 624 ret = -ENOMEM; 631 625 goto err_deblk_buf; ··· 635 629 return 0; 636 630 637 631 err_deblk_buf: 638 - dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, 639 - ctx->codec.h264.deblk_buf, 640 - ctx->codec.h264.deblk_buf_dma); 632 + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 633 + ctx->codec.h264.deblk_buf, 634 + ctx->codec.h264.deblk_buf_dma, 635 + DMA_ATTR_NO_KERNEL_MAPPING); 641 636 642 637 err_mv_col_buf: 643 - dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, 644 - ctx->codec.h264.mv_col_buf, 645 - ctx->codec.h264.mv_col_buf_dma); 638 + dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, 639 + ctx->codec.h264.mv_col_buf, 640 + ctx->codec.h264.mv_col_buf_dma, 641 + DMA_ATTR_NO_KERNEL_MAPPING); 646 642 647 643 err_neighbor_buf: 648 - dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 649 - ctx->codec.h264.neighbor_info_buf, 650 - ctx->codec.h264.neighbor_info_buf_dma); 644 + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 645 + ctx->codec.h264.neighbor_info_buf, 646 + ctx->codec.h264.neighbor_info_buf_dma, 647 + DMA_ATTR_NO_KERNEL_MAPPING); 651 648 652 649 err_pic_buf: 653 - dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, 654 - ctx->codec.h264.pic_info_buf, 655 - ctx->codec.h264.pic_info_buf_dma); 650 + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 651 + ctx->codec.h264.pic_info_buf, 652 + ctx->codec.h264.pic_info_buf_dma, 653 + DMA_ATTR_NO_KERNEL_MAPPING); 656 654 return ret; 657 655 } 658 656 ··· 664 654 { 665 655 struct cedrus_dev *dev = ctx->dev; 666 656 667 - dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, 668 - ctx->codec.h264.mv_col_buf, 669 - ctx->codec.h264.mv_col_buf_dma); 670 - dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 671 - ctx->codec.h264.neighbor_info_buf, 672 - ctx->codec.h264.neighbor_info_buf_dma); 673 - dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, 674 - ctx->codec.h264.pic_info_buf, 675 - ctx->codec.h264.pic_info_buf_dma); 657 + dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, 658 + ctx->codec.h264.mv_col_buf, 659 + ctx->codec.h264.mv_col_buf_dma, 660 + DMA_ATTR_NO_KERNEL_MAPPING); 661 + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 662 + ctx->codec.h264.neighbor_info_buf, 663 + ctx->codec.h264.neighbor_info_buf_dma, 664 + DMA_ATTR_NO_KERNEL_MAPPING); 665 + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 666 + ctx->codec.h264.pic_info_buf, 667 + ctx->codec.h264.pic_info_buf_dma, 668 + DMA_ATTR_NO_KERNEL_MAPPING); 676 669 if (ctx->codec.h264.deblk_buf_size) 677 - dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, 678 - ctx->codec.h264.deblk_buf, 679 - ctx->codec.h264.deblk_buf_dma); 670 + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 671 + ctx->codec.h264.deblk_buf, 672 + ctx->codec.h264.deblk_buf_dma, 673 + DMA_ATTR_NO_KERNEL_MAPPING); 680 674 if (ctx->codec.h264.intra_pred_buf_size) 681 - dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size, 682 - ctx->codec.h264.intra_pred_buf, 683 - ctx->codec.h264.intra_pred_buf_dma); 675 + dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size, 676 + ctx->codec.h264.intra_pred_buf, 677 + ctx->codec.h264.intra_pred_buf_dma, 678 + DMA_ATTR_NO_KERNEL_MAPPING); 684 679 } 685 680 686 681 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
+17 -13
drivers/staging/media/sunxi/cedrus/cedrus_h265.c
··· 350 350 ctx->codec.h265.mv_col_buf_size = num_buffers * 351 351 ctx->codec.h265.mv_col_buf_unit_size; 352 352 353 + /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 353 354 ctx->codec.h265.mv_col_buf = 354 - dma_alloc_coherent(dev->dev, 355 - ctx->codec.h265.mv_col_buf_size, 356 - &ctx->codec.h265.mv_col_buf_addr, 357 - GFP_KERNEL); 355 + dma_alloc_attrs(dev->dev, 356 + ctx->codec.h265.mv_col_buf_size, 357 + &ctx->codec.h265.mv_col_buf_addr, 358 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 358 359 if (!ctx->codec.h265.mv_col_buf) { 359 360 ctx->codec.h265.mv_col_buf_size = 0; 360 361 // TODO: Abort the process here. ··· 668 667 /* The buffer size is calculated at setup time. */ 669 668 ctx->codec.h265.mv_col_buf_size = 0; 670 669 670 + /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 671 671 ctx->codec.h265.neighbor_info_buf = 672 - dma_alloc_coherent(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 673 - &ctx->codec.h265.neighbor_info_buf_addr, 674 - GFP_KERNEL); 672 + dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 673 + &ctx->codec.h265.neighbor_info_buf_addr, 674 + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 675 675 if (!ctx->codec.h265.neighbor_info_buf) 676 676 return -ENOMEM; 677 677 ··· 684 682 struct cedrus_dev *dev = ctx->dev; 685 683 686 684 if (ctx->codec.h265.mv_col_buf_size > 0) { 687 - dma_free_coherent(dev->dev, ctx->codec.h265.mv_col_buf_size, 688 - ctx->codec.h265.mv_col_buf, 689 - ctx->codec.h265.mv_col_buf_addr); 685 + dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, 686 + ctx->codec.h265.mv_col_buf, 687 + ctx->codec.h265.mv_col_buf_addr, 688 + DMA_ATTR_NO_KERNEL_MAPPING); 690 689 691 690 ctx->codec.h265.mv_col_buf_size = 0; 692 691 } 693 692 694 - dma_free_coherent(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 695 - ctx->codec.h265.neighbor_info_buf, 696 - ctx->codec.h265.neighbor_info_buf_addr); 693 + dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 694 + ctx->codec.h265.neighbor_info_buf, 695 + ctx->codec.h265.neighbor_info_buf_addr, 696 + DMA_ATTR_NO_KERNEL_MAPPING); 697 697 } 698 698 699 699 static void cedrus_h265_trigger(struct cedrus_ctx *ctx)
+1
drivers/staging/media/sunxi/cedrus/cedrus_video.c
··· 568 568 569 569 src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; 570 570 src_vq->io_modes = VB2_MMAP | VB2_DMABUF; 571 + src_vq->dma_attrs = DMA_ATTR_NO_KERNEL_MAPPING; 571 572 src_vq->drv_priv = ctx; 572 573 src_vq->buf_struct_size = sizeof(struct cedrus_buffer); 573 574 src_vq->ops = &cedrus_qops;