Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu/uvd7: add sriov uvd initialization sequences

Add UVD initialization for SRIOV.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Frank Min and committed by
Alex Deucher
247ac951 7006dde2

+246
+246
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
··· 27 27 #include "amdgpu_uvd.h" 28 28 #include "soc15d.h" 29 29 #include "soc15_common.h" 30 + #include "mmsch_v1_0.h" 30 31 31 32 #include "vega10/soc15ip.h" 32 33 #include "vega10/UVD/uvd_7_0_offset.h" 33 34 #include "vega10/UVD/uvd_7_0_sh_mask.h" 35 + #include "vega10/VCE/vce_4_0_offset.h" 36 + #include "vega10/VCE/vce_4_0_default.h" 37 + #include "vega10/VCE/vce_4_0_sh_mask.h" 34 38 #include "vega10/NBIF/nbif_6_1_offset.h" 35 39 #include "vega10/HDP/hdp_4_0_offset.h" 36 40 #include "vega10/MMHUB/mmhub_1_0_offset.h" ··· 45 41 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); 46 42 static int uvd_v7_0_start(struct amdgpu_device *adev); 47 43 static void uvd_v7_0_stop(struct amdgpu_device *adev); 44 + static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); 48 45 49 46 /** 50 47 * uvd_v7_0_ring_get_rptr - get read pointer ··· 621 616 adev->gfx.config.gb_addr_config); 622 617 623 618 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); 619 + } 620 + 621 + static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, 622 + struct amdgpu_mm_table *table) 623 + { 624 + uint32_t data = 0, loop; 625 + uint64_t addr = table->gpu_addr; 626 + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 627 + uint32_t size; 628 + 629 + size = header->header_size + header->vce_table_size + header->uvd_table_size; 630 + 631 + /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 632 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 633 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 634 + 635 + /* 2, update vmid of descriptor */ 636 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 637 + data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 638 + data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 639 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 640 + 641 + /* 3, notify mmsch about the size of this descriptor */ 642 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 643 + 644 + /* 4, set resp to zero */ 645 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 646 + 647 + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 648 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 649 + 650 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 651 + loop = 1000; 652 + while ((data & 0x10000002) != 0x10000002) { 653 + udelay(10); 654 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 655 + loop--; 656 + if (!loop) 657 + break; 658 + } 659 + 660 + if (!loop) { 661 + dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 662 + return -EBUSY; 663 + } 664 + 665 + return 0; 666 + } 667 + 668 + static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) 669 + { 670 + struct amdgpu_ring *ring; 671 + uint32_t offset, size, tmp; 672 + uint32_t table_size = 0; 673 + struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} }; 674 + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; 675 + struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} }; 676 + //struct mmsch_v1_0_cmd_indirect_write indirect_wt = {{0}}; 677 + struct mmsch_v1_0_cmd_end end = { {0} }; 678 + uint32_t *init_table = adev->virt.mm_table.cpu_addr; 679 + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 680 + 681 + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 682 + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 683 + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 684 + end.cmd_header.command_type = MMSCH_COMMAND__END; 685 + 686 + if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) { 687 + header->version = MMSCH_VERSION; 688 + header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 689 + 690 + if (header->vce_table_offset == 0 && header->vce_table_size == 0) 691 + header->uvd_table_offset = header->header_size; 692 + else 693 + header->uvd_table_offset = header->vce_table_size + header->vce_table_offset; 694 + 695 + init_table += header->uvd_table_offset; 696 + 697 + ring = &adev->uvd.ring; 698 + size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); 699 + 700 + /* disable clock gating */ 701 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 702 + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0); 703 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 704 + 0xFFFFFFFF, 0x00000004); 705 + /* mc resume*/ 706 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 707 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 708 + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 709 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 710 + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 711 + offset = 0; 712 + } else { 713 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 714 + lower_32_bits(adev->uvd.gpu_addr)); 715 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 716 + upper_32_bits(adev->uvd.gpu_addr)); 717 + offset = size; 718 + } 719 + 720 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 721 + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 722 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); 723 + 724 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 725 + lower_32_bits(adev->uvd.gpu_addr + offset)); 726 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 727 + upper_32_bits(adev->uvd.gpu_addr + offset)); 728 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); 729 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); 730 + 731 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 732 + lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 733 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 734 + upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 735 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); 736 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), 737 + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); 738 + 739 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), 740 + adev->gfx.config.gb_addr_config); 741 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), 742 + adev->gfx.config.gb_addr_config); 743 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), 744 + adev->gfx.config.gb_addr_config); 745 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); 746 + /* mc resume end*/ 747 + 748 + /* disable clock gating */ 749 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 750 + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); 751 + 752 + /* disable interupt */ 753 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 754 + ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); 755 + 756 + /* stall UMC and register bus before resetting VCPU */ 757 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 758 + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 759 + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 760 + 761 + /* put LMI, VCPU, RBC etc... into reset */ 762 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 763 + (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | 764 + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | 765 + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | 766 + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | 767 + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | 768 + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | 769 + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | 770 + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); 771 + 772 + /* initialize UVD memory controller */ 773 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), 774 + (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 775 + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 776 + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 777 + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 778 + UVD_LMI_CTRL__REQ_MODE_MASK | 779 + 0x00100000L)); 780 + 781 + /* disable byte swapping */ 782 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0); 783 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0); 784 + 785 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); 786 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); 787 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); 788 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); 789 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); 790 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); 791 + 792 + /* take all subblocks out of reset, except VCPU */ 793 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 794 + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); 795 + 796 + /* enable VCPU clock */ 797 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 798 + UVD_VCPU_CNTL__CLK_EN_MASK); 799 + 800 + /* enable UMC */ 801 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 802 + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); 803 + 804 + /* boot up the VCPU */ 805 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); 806 + 807 + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); 808 + 809 + /* enable master interrupt */ 810 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 811 + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), 812 + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); 813 + 814 + /* clear the bit 4 of UVD_STATUS */ 815 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 816 + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); 817 + 818 + /* force RBC into idle state */ 819 + size = order_base_2(ring->ring_size); 820 + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); 821 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); 822 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); 823 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); 824 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 825 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 826 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); 827 + 828 + /* set the write pointer delay */ 829 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); 830 + 831 + /* set the wb address */ 832 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), 833 + (upper_32_bits(ring->gpu_addr) >> 2)); 834 + 835 + /* programm the RB_BASE for ring buffer */ 836 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), 837 + lower_32_bits(ring->gpu_addr)); 838 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), 839 + upper_32_bits(ring->gpu_addr)); 840 + 841 + ring->wptr = 0; 842 + ring = &adev->uvd.ring_enc[0]; 843 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); 844 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 845 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); 846 + 847 + ring = &adev->uvd.ring_enc[1]; 848 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); 849 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 850 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); 851 + 852 + /* add end packet */ 853 + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 854 + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 855 + header->uvd_table_size = table_size; 856 + 857 + return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); 858 + } 859 + return -EINVAL; /* already initializaed ? */ 624 860 } 625 861 626 862 /**