Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Reduce number of arguments of dcn30's CalculatePrefetchSchedule()

After an innocuous optimization change in clang-22,
dml30_ModeSupportAndSystemConfigurationFull() is over the 2048 byte
stack limit for display_mode_vba_30.c.

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (2096) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than]
3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
| ^

With clang-21, this function was already close to the limit:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (1912) exceeds limit (1586) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than]
3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
| ^

CalculatePrefetchSchedule() has a large number of parameters, which must
be passed on the stack. Most of the parameters between the two callsites
are the same, so they can be accessed through the existing mode_lib
pointer, instead of being passed as explicit arguments. Doing this
reduces the stack size of dml30_ModeSupportAndSystemConfigurationFull()
from 2096 bytes to 1912 bytes with clang-22.

Closes: https://github.com/ClangBuiltLinux/linux/issues/2117
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit b20b3fc4210f83089f835cdb91deec4b0778761a)

authored by

Nathan Chancellor and committed by
Alex Deucher
f54a91f5 70740454

+73 -185
+73 -185
drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
··· 77 77 static unsigned int dscComputeDelay( 78 78 enum output_format_class pixelFormat, 79 79 enum output_encoder_class Output); 80 - // Super monster function with some 45 argument 81 80 static bool CalculatePrefetchSchedule( 82 81 struct display_mode_lib *mode_lib, 83 - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 84 - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 82 + unsigned int k, 85 83 Pipe *myPipe, 86 84 unsigned int DSCDelay, 87 - double DPPCLKDelaySubtotalPlusCNVCFormater, 88 - double DPPCLKDelaySCL, 89 - double DPPCLKDelaySCLLBOnly, 90 - double DPPCLKDelayCNVCCursor, 91 - double DISPCLKDelaySubtotal, 92 85 unsigned int DPP_RECOUT_WIDTH, 93 - enum output_format_class OutputFormat, 94 - unsigned int MaxInterDCNTileRepeaters, 95 86 unsigned int VStartup, 96 87 unsigned int MaxVStartup, 97 - unsigned int GPUVMPageTableLevels, 98 - bool GPUVMEnable, 99 - bool HostVMEnable, 100 - unsigned int HostVMMaxNonCachedPageTableLevels, 101 - double HostVMMinPageSize, 102 - bool DynamicMetadataEnable, 103 - bool DynamicMetadataVMEnabled, 104 - int DynamicMetadataLinesBeforeActiveRequired, 105 - unsigned int DynamicMetadataTransmittedBytes, 106 88 double UrgentLatency, 107 89 double UrgentExtraLatency, 108 90 double TCalc, ··· 98 116 unsigned int MaxNumSwathY, 99 117 double PrefetchSourceLinesC, 100 118 unsigned int SwathWidthC, 101 - int BytePerPixelC, 102 119 double VInitPreFillC, 103 120 unsigned int MaxNumSwathC, 104 121 long swath_width_luma_ub, ··· 105 124 unsigned int SwathHeightY, 106 125 unsigned int SwathHeightC, 107 126 double TWait, 108 - bool ProgressiveToInterlaceUnitInOPP, 109 - double *DSTXAfterScaler, 110 - double *DSTYAfterScaler, 111 127 double *DestinationLinesForPrefetch, 112 128 double *PrefetchBandwidth, 113 129 double *DestinationLinesToRequestVMInVBlank, ··· 113 135 double *VRatioPrefetchC, 114 136 double *RequiredPrefetchPixDataBWLuma, 115 137 double *RequiredPrefetchPixDataBWChroma, 116 - bool *NotEnoughTimeForDynamicMetadata, 117 - double *Tno_bw, 118 - double *prefetch_vmrow_bw, 119 - double *Tdmdl_vm, 120 - double *Tdmdl, 121 - unsigned int *VUpdateOffsetPix, 122 - double *VUpdateWidthPix, 123 - double *VReadyOffsetPix); 138 + bool *NotEnoughTimeForDynamicMetadata); 124 139 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 125 140 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 126 141 static void CalculateDCCConfiguration( ··· 781 810 782 811 static bool CalculatePrefetchSchedule( 783 812 struct display_mode_lib *mode_lib, 784 - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 785 - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 813 + unsigned int k, 786 814 Pipe *myPipe, 787 815 unsigned int DSCDelay, 788 - double DPPCLKDelaySubtotalPlusCNVCFormater, 789 - double DPPCLKDelaySCL, 790 - double DPPCLKDelaySCLLBOnly, 791 - double DPPCLKDelayCNVCCursor, 792 - double DISPCLKDelaySubtotal, 793 816 unsigned int DPP_RECOUT_WIDTH, 794 - enum output_format_class OutputFormat, 795 - unsigned int MaxInterDCNTileRepeaters, 796 817 unsigned int VStartup, 797 818 unsigned int MaxVStartup, 798 - unsigned int GPUVMPageTableLevels, 799 - bool GPUVMEnable, 800 - bool HostVMEnable, 801 - unsigned int HostVMMaxNonCachedPageTableLevels, 802 - double HostVMMinPageSize, 803 - bool DynamicMetadataEnable, 804 - bool DynamicMetadataVMEnabled, 805 - int DynamicMetadataLinesBeforeActiveRequired, 806 - unsigned int DynamicMetadataTransmittedBytes, 807 819 double UrgentLatency, 808 820 double UrgentExtraLatency, 809 821 double TCalc, ··· 800 846 unsigned int MaxNumSwathY, 801 847 double PrefetchSourceLinesC, 802 848 unsigned int SwathWidthC, 803 - int BytePerPixelC, 804 849 double VInitPreFillC, 805 850 unsigned int MaxNumSwathC, 806 851 long swath_width_luma_ub, ··· 807 854 unsigned int SwathHeightY, 808 855 unsigned int SwathHeightC, 809 856 double TWait, 810 - bool ProgressiveToInterlaceUnitInOPP, 811 - double *DSTXAfterScaler, 812 - double *DSTYAfterScaler, 813 857 double *DestinationLinesForPrefetch, 814 858 double *PrefetchBandwidth, 815 859 double *DestinationLinesToRequestVMInVBlank, ··· 815 865 double *VRatioPrefetchC, 816 866 double *RequiredPrefetchPixDataBWLuma, 817 867 double *RequiredPrefetchPixDataBWChroma, 818 - bool *NotEnoughTimeForDynamicMetadata, 819 - double *Tno_bw, 820 - double *prefetch_vmrow_bw, 821 - double *Tdmdl_vm, 822 - double *Tdmdl, 823 - unsigned int *VUpdateOffsetPix, 824 - double *VUpdateWidthPix, 825 - double *VReadyOffsetPix) 868 + bool *NotEnoughTimeForDynamicMetadata) 826 869 { 870 + struct vba_vars_st *v = &mode_lib->vba; 871 + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 827 872 bool MyError = false; 828 873 unsigned int DPPCycles = 0, DISPCLKCycles = 0; 829 874 double DSTTotalPixelsAfterScaler = 0; ··· 850 905 double Tdmec = 0; 851 906 double Tdmsks = 0; 852 907 853 - if (GPUVMEnable == true && HostVMEnable == true) { 854 - HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; 855 - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 908 + if (v->GPUVMEnable == true && v->HostVMEnable == true) { 909 + HostVMInefficiencyFactor = v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; 910 + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 856 911 } else { 857 912 HostVMInefficiencyFactor = 1; 858 913 HostVMDynamicLevelsTrips = 0; 859 914 } 860 915 861 916 CalculateDynamicMetadataParameters( 862 - MaxInterDCNTileRepeaters, 917 + v->MaxInterDCNTileRepeaters, 863 918 myPipe->DPPCLK, 864 919 myPipe->DISPCLK, 865 920 myPipe->DCFCLKDeepSleep, 866 921 myPipe->PixelClock, 867 922 myPipe->HTotal, 868 923 myPipe->VBlank, 869 - DynamicMetadataTransmittedBytes, 870 - DynamicMetadataLinesBeforeActiveRequired, 924 + v->DynamicMetadataTransmittedBytes[k], 925 + v->DynamicMetadataLinesBeforeActiveRequired[k], 871 926 myPipe->InterlaceEnable, 872 - ProgressiveToInterlaceUnitInOPP, 927 + v->ProgressiveToInterlaceUnitInOPP, 873 928 &Tsetup, 874 929 &Tdmbf, 875 930 &Tdmec, ··· 877 932 878 933 LineTime = myPipe->HTotal / myPipe->PixelClock; 879 934 trip_to_mem = UrgentLatency; 880 - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 935 + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 881 936 882 - if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 883 - *Tdmdl = TWait + Tvm_trips + trip_to_mem; 937 + if (v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true) { 938 + v->Tdmdl[k] = TWait + Tvm_trips + trip_to_mem; 884 939 } else { 885 - *Tdmdl = TWait + UrgentExtraLatency; 940 + v->Tdmdl[k] = TWait + UrgentExtraLatency; 886 941 } 887 942 888 - if (DynamicMetadataEnable == true) { 889 - if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 943 + if (v->DynamicMetadataEnable[k] == true) { 944 + if (VStartup * LineTime < Tsetup + v->Tdmdl[k] + Tdmbf + Tdmec + Tdmsks) { 890 945 *NotEnoughTimeForDynamicMetadata = true; 891 946 } else { 892 947 *NotEnoughTimeForDynamicMetadata = false; ··· 894 949 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 895 950 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 896 951 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 897 - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 952 + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); 898 953 } 899 954 } else { 900 955 *NotEnoughTimeForDynamicMetadata = false; 901 956 } 902 957 903 - *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 958 + v->Tdmdl_vm[k] = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 904 959 905 960 if (myPipe->ScalerEnabled) 906 - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 961 + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 907 962 else 908 - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 963 + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 909 964 910 - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 965 + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 911 966 912 - DISPCLKCycles = DISPCLKDelaySubtotal; 967 + DISPCLKCycles = v->DISPCLKDelaySubtotal; 913 968 914 969 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 915 970 return true; 916 971 917 - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK 972 + v->DSTXAfterScaler[k] = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK 918 973 + DSCDelay; 919 974 920 - *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 975 + v->DSTXAfterScaler[k] = v->DSTXAfterScaler[k] + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 921 976 922 - if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) 923 - *DSTYAfterScaler = 1; 977 + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && v->ProgressiveToInterlaceUnitInOPP)) 978 + v->DSTYAfterScaler[k] = 1; 924 979 else 925 - *DSTYAfterScaler = 0; 980 + v->DSTYAfterScaler[k] = 0; 926 981 927 - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 928 - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 929 - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 982 + DSTTotalPixelsAfterScaler = v->DSTYAfterScaler[k] * myPipe->HTotal + v->DSTXAfterScaler[k]; 983 + v->DSTYAfterScaler[k] = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 984 + v->DSTXAfterScaler[k] = DSTTotalPixelsAfterScaler - ((double) (v->DSTYAfterScaler[k] * myPipe->HTotal)); 930 985 931 986 MyError = false; 932 987 ··· 935 990 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 936 991 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 937 992 938 - if (GPUVMEnable) { 939 - if (GPUVMPageTableLevels >= 3) { 940 - *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 993 + if (v->GPUVMEnable) { 994 + if (v->GPUVMMaxPageTableLevels >= 3) { 995 + v->Tno_bw[k] = UrgentExtraLatency + trip_to_mem * ((v->GPUVMMaxPageTableLevels - 2) - 1); 941 996 } else 942 - *Tno_bw = 0; 997 + v->Tno_bw[k] = 0; 943 998 } else if (!myPipe->DCCEnable) 944 - *Tno_bw = LineTime; 999 + v->Tno_bw[k] = LineTime; 945 1000 else 946 - *Tno_bw = LineTime / 4; 1001 + v->Tno_bw[k] = LineTime / 4; 947 1002 948 - dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime 949 - - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1003 + dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, v->Tdmdl[k])) / LineTime 1004 + - (v->DSTYAfterScaler[k] + v->DSTXAfterScaler[k] / myPipe->HTotal); 950 1005 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH 951 1006 952 1007 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC); 953 1008 Tsw_oto = Lsw_oto * LineTime; 954 1009 955 - prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto; 1010 + prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / Tsw_oto; 956 1011 957 - if (GPUVMEnable == true) { 958 - Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 1012 + if (v->GPUVMEnable == true) { 1013 + Tvm_oto = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 959 1014 Tvm_trips, 960 1015 LineTime / 4.0); 961 1016 } else 962 1017 Tvm_oto = LineTime / 4.0; 963 1018 964 - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1019 + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 965 1020 Tr0_oto = dml_max3( 966 1021 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 967 1022 LineTime - Tvm_oto, LineTime / 4); ··· 987 1042 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 988 1043 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 989 1044 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 990 - dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 991 - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 992 - dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 993 - dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler); 1045 + dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", v->Tdmdl_vm[k]); 1046 + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); 1047 + dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", v->DSTXAfterScaler[k]); 1048 + dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)v->DSTYAfterScaler[k]); 994 1049 995 1050 *PrefetchBandwidth = 0; 996 1051 *DestinationLinesToRequestVMInVBlank = 0; ··· 1004 1059 double PrefetchBandwidth3 = 0; 1005 1060 double PrefetchBandwidth4 = 0; 1006 1061 1007 - if (Tpre_rounded - *Tno_bw > 0) 1062 + if (Tpre_rounded - v->Tno_bw[k] > 0) 1008 1063 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 1009 1064 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1010 1065 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY 1011 - + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) 1012 - / (Tpre_rounded - *Tno_bw); 1066 + + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) 1067 + / (Tpre_rounded - v->Tno_bw[k]); 1013 1068 else 1014 1069 PrefetchBandwidth1 = 0; 1015 1070 1016 - if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) { 1017 - PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw); 1071 + if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]) > 0) { 1072 + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]); 1018 1073 } 1019 1074 1020 - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1075 + if (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded > 0) 1021 1076 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * 1022 1077 HostVMInefficiencyFactor + PrefetchSourceLinesY * 1023 1078 swath_width_luma_ub * BytePerPixelY + 1024 1079 PrefetchSourceLinesC * swath_width_chroma_ub * 1025 - BytePerPixelC) / 1026 - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1080 + v->BytePerPixelC[k]) / 1081 + (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded); 1027 1082 else 1028 1083 PrefetchBandwidth2 = 0; 1029 1084 ··· 1031 1086 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * 1032 1087 HostVMInefficiencyFactor + PrefetchSourceLinesY * 1033 1088 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * 1034 - swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded - 1089 + swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded - 1035 1090 Tvm_trips_rounded); 1036 1091 else 1037 1092 PrefetchBandwidth3 = 0; ··· 1041 1096 } 1042 1097 1043 1098 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1044 - PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) 1099 + PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) 1045 1100 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1046 1101 else 1047 1102 PrefetchBandwidth4 = 0; ··· 1052 1107 bool Case3OK; 1053 1108 1054 1109 if (PrefetchBandwidth1 > 0) { 1055 - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 1110 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 1056 1111 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1057 1112 Case1OK = true; 1058 1113 } else { ··· 1063 1118 } 1064 1119 1065 1120 if (PrefetchBandwidth2 > 0) { 1066 - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 1121 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 1067 1122 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1068 1123 Case2OK = true; 1069 1124 } else { ··· 1074 1129 } 1075 1130 1076 1131 if (PrefetchBandwidth3 > 0) { 1077 - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 1132 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 1078 1133 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1079 1134 Case3OK = true; 1080 1135 } else { ··· 1097 1152 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ); 1098 1153 1099 1154 if (prefetch_bw_equ > 0) { 1100 - if (GPUVMEnable) { 1101 - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1155 + if (v->GPUVMEnable) { 1156 + Tvm_equ = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1102 1157 } else { 1103 1158 Tvm_equ = LineTime / 4; 1104 1159 } 1105 1160 1106 - if ((GPUVMEnable || myPipe->DCCEnable)) { 1161 + if ((v->GPUVMEnable || myPipe->DCCEnable)) { 1107 1162 Tr0_equ = dml_max4( 1108 1163 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1109 1164 Tr0_trips, ··· 1172 1227 } 1173 1228 1174 1229 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime; 1175 - *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime; 1230 + *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * v->BytePerPixelC[k] * swath_width_chroma_ub / LineTime; 1176 1231 } else { 1177 1232 MyError = true; 1178 1233 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); ··· 1188 1243 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1189 1244 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1190 1245 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime); 1191 - dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 1246 + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime); 1192 1247 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n"); 1193 - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); 1248 + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); 1194 1249 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1195 1250 1196 1251 } else { ··· 1221 1276 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1222 1277 } 1223 1278 1224 - *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1279 + v->prefetch_vmrow_bw[k] = dml_max(prefetch_vm_bw, prefetch_row_bw); 1225 1280 } 1226 1281 1227 1282 if (MyError) { ··· 2382 2437 2383 2438 v->ErrorResult[k] = CalculatePrefetchSchedule( 2384 2439 mode_lib, 2385 - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 2386 - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 2440 + k, 2387 2441 &myPipe, 2388 2442 v->DSCDelay[k], 2389 - v->DPPCLKDelaySubtotal 2390 - + v->DPPCLKDelayCNVCFormater, 2391 - v->DPPCLKDelaySCL, 2392 - v->DPPCLKDelaySCLLBOnly, 2393 - v->DPPCLKDelayCNVCCursor, 2394 - v->DISPCLKDelaySubtotal, 2395 2443 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2396 - v->OutputFormat[k], 2397 - v->MaxInterDCNTileRepeaters, 2398 2444 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2399 2445 v->MaxVStartupLines[k], 2400 - v->GPUVMMaxPageTableLevels, 2401 - v->GPUVMEnable, 2402 - v->HostVMEnable, 2403 - v->HostVMMaxNonCachedPageTableLevels, 2404 - v->HostVMMinPageSize, 2405 - v->DynamicMetadataEnable[k], 2406 - v->DynamicMetadataVMEnabled, 2407 - v->DynamicMetadataLinesBeforeActiveRequired[k], 2408 - v->DynamicMetadataTransmittedBytes[k], 2409 2446 v->UrgentLatency, 2410 2447 v->UrgentExtraLatency, 2411 2448 v->TCalc, ··· 2401 2474 v->MaxNumSwathY[k], 2402 2475 v->PrefetchSourceLinesC[k], 2403 2476 v->SwathWidthC[k], 2404 - v->BytePerPixelC[k], 2405 2477 v->VInitPreFillC[k], 2406 2478 v->MaxNumSwathC[k], 2407 2479 v->swath_width_luma_ub[k], ··· 2408 2482 v->SwathHeightY[k], 2409 2483 v->SwathHeightC[k], 2410 2484 TWait, 2411 - v->ProgressiveToInterlaceUnitInOPP, 2412 - &v->DSTXAfterScaler[k], 2413 - &v->DSTYAfterScaler[k], 2414 2485 &v->DestinationLinesForPrefetch[k], 2415 2486 &v->PrefetchBandwidth[k], 2416 2487 &v->DestinationLinesToRequestVMInVBlank[k], ··· 2416 2493 &v->VRatioPrefetchC[k], 2417 2494 &v->RequiredPrefetchPixDataBWLuma[k], 2418 2495 &v->RequiredPrefetchPixDataBWChroma[k], 2419 - &v->NotEnoughTimeForDynamicMetadata[k], 2420 - &v->Tno_bw[k], 2421 - &v->prefetch_vmrow_bw[k], 2422 - &v->Tdmdl_vm[k], 2423 - &v->Tdmdl[k], 2424 - &v->VUpdateOffsetPix[k], 2425 - &v->VUpdateWidthPix[k], 2426 - &v->VReadyOffsetPix[k]); 2496 + &v->NotEnoughTimeForDynamicMetadata[k]); 2427 2497 if (v->BlendingAndTiming[k] == k) { 2428 2498 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK); 2429 2499 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k]; ··· 4686 4770 4687 4771 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 4688 4772 mode_lib, 4689 - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, 4690 - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, 4773 + k, 4691 4774 &myPipe, 4692 4775 v->DSCDelayPerState[i][k], 4693 - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 4694 - v->DPPCLKDelaySCL, 4695 - v->DPPCLKDelaySCLLBOnly, 4696 - v->DPPCLKDelayCNVCCursor, 4697 - v->DISPCLKDelaySubtotal, 4698 4776 v->SwathWidthYThisState[k] / v->HRatio[k], 4699 - v->OutputFormat[k], 4700 - v->MaxInterDCNTileRepeaters, 4701 4777 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 4702 4778 v->MaximumVStartup[i][j][k], 4703 - v->GPUVMMaxPageTableLevels, 4704 - v->GPUVMEnable, 4705 - v->HostVMEnable, 4706 - v->HostVMMaxNonCachedPageTableLevels, 4707 - v->HostVMMinPageSize, 4708 - v->DynamicMetadataEnable[k], 4709 - v->DynamicMetadataVMEnabled, 4710 - v->DynamicMetadataLinesBeforeActiveRequired[k], 4711 - v->DynamicMetadataTransmittedBytes[k], 4712 4779 v->UrgLatency[i], 4713 4780 v->ExtraLatency, 4714 4781 v->TimeCalc, ··· 4705 4806 v->MaxNumSwY[k], 4706 4807 v->PrefetchLinesC[i][j][k], 4707 4808 v->SwathWidthCThisState[k], 4708 - v->BytePerPixelC[k], 4709 4809 v->PrefillC[k], 4710 4810 v->MaxNumSwC[k], 4711 4811 v->swath_width_luma_ub_this_state[k], ··· 4712 4814 v->SwathHeightYThisState[k], 4713 4815 v->SwathHeightCThisState[k], 4714 4816 v->TWait, 4715 - v->ProgressiveToInterlaceUnitInOPP, 4716 - &v->DSTXAfterScaler[k], 4717 - &v->DSTYAfterScaler[k], 4718 4817 &v->LineTimesForPrefetch[k], 4719 4818 &v->PrefetchBW[k], 4720 4819 &v->LinesForMetaPTE[k], ··· 4720 4825 &v->VRatioPreC[i][j][k], 4721 4826 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 4722 4827 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 4723 - &v->NoTimeForDynamicMetadata[i][j][k], 4724 - &v->Tno_bw[k], 4725 - &v->prefetch_vmrow_bw[k], 4726 - &v->Tdmdl_vm[k], 4727 - &v->Tdmdl[k], 4728 - &v->VUpdateOffsetPix[k], 4729 - &v->VUpdateWidthPix[k], 4730 - &v->VReadyOffsetPix[k]); 4828 + &v->NoTimeForDynamicMetadata[i][j][k]); 4731 4829 } 4732 4830 4733 4831 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {